@@ -45,11 +45,11 @@ type FetchRequest struct {
4545
4646// FetchURL retrieves and processes content from the specified URL
4747func (f * HTTPFetcher ) FetchURL (req * FetchRequest ) (string , error ) {
48- log .Printf ("Fetching URL: %s" , req .URL )
48+ log .Printf ("Fetching URL: %s" , sanitizeLogValue ( req .URL ) )
4949
5050 // Check robots.txt
5151 if ! f .robotsChecker .IsAllowed (req .URL ) {
52- log .Printf ("Access denied by robots.txt for URL: %s" , req .URL )
52+ log .Printf ("Access denied by robots.txt for URL: %s" , sanitizeLogValue ( req .URL ) )
5353 return "" , fmt .Errorf ("access to %s is disallowed by robots.txt" , req .URL )
5454 }
5555
@@ -62,16 +62,23 @@ func (f *HTTPFetcher) FetchURL(req *FetchRequest) (string, error) {
6262 // Apply formatting
6363 formattedContent := f .processor .FormatContent (content , req .StartIndex , req .MaxLength )
6464
65- log .Printf ("Fetch completed successfully for %s, returning %d characters" , req .URL , len (formattedContent ))
65+ log .Printf ("Fetch completed successfully for %s, returning %d characters" , sanitizeLogValue ( req .URL ) , len (formattedContent ))
6666 return formattedContent , nil
6767}
6868
69+ // sanitizeLogValue removes newlines and carriage returns to prevent log injection.
70+ func sanitizeLogValue (s string ) string {
71+ s = strings .ReplaceAll (s , "\n " , "" )
72+ s = strings .ReplaceAll (s , "\r " , "" )
73+ return s
74+ }
75+
6976// fetchURL retrieves content from the specified URL
7077func (f * HTTPFetcher ) fetchURL (url string , raw bool ) (string , error ) {
7178 // Create HTTP request
7279 req , err := http .NewRequest ("GET" , url , nil )
7380 if err != nil {
74- log .Printf ("Failed to create HTTP request for %s: %v" , url , err )
81+ log .Printf ("Failed to create HTTP request for %s: %v" , sanitizeLogValue ( url ) , err )
7582 return "" , fmt .Errorf ("failed to create request: %v" , err )
7683 }
7784
@@ -80,29 +87,33 @@ func (f *HTTPFetcher) fetchURL(url string, raw bool) (string, error) {
8087 req .Header .Set ("Accept" , "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" )
8188
8289 // Make HTTP request
83- resp , err := f .httpClient .Do (req )
90+ resp , err := f .httpClient .Do (req ) //nolint:gosec // This is a fetch server; fetching user-provided URLs is its core purpose
8491 if err != nil {
85- log .Printf ("HTTP request failed for %s: %v" , url , err )
92+ log .Printf ("HTTP request failed for %s: %v" , sanitizeLogValue ( url ) , err )
8693 return "" , fmt .Errorf ("failed to fetch URL: %v" , err )
8794 }
8895 defer resp .Body .Close ()
8996
90- log .Printf ("HTTP %d response from %s (Content-Type: %s)" , resp .StatusCode , url , resp .Header .Get ("Content-Type" ))
97+ //nolint:gosec // URL sanitized by sanitizeLogValue; gosec can't track custom sanitizers
98+ log .Printf ("HTTP %d response from %s (Content-Type: %s)" ,
99+ resp .StatusCode , sanitizeLogValue (url ), resp .Header .Get ("Content-Type" ))
91100
92101 // Check status code
93102 if resp .StatusCode != http .StatusOK {
94- log .Printf ("Non-200 status code %d for %s: %s" , resp .StatusCode , url , resp .Status )
103+ //nolint:gosec // URL sanitized by sanitizeLogValue; gosec can't track custom sanitizers
104+ log .Printf ("Non-200 status code %d for %s: %s" ,
105+ resp .StatusCode , sanitizeLogValue (url ), resp .Status )
95106 return "" , fmt .Errorf ("HTTP %d: %s" , resp .StatusCode , resp .Status )
96107 }
97108
98109 // Read response body
99110 body , err := io .ReadAll (resp .Body )
100111 if err != nil {
101- log .Printf ("Failed to read response body from %s: %v" , url , err )
112+ log .Printf ("Failed to read response body from %s: %v" , sanitizeLogValue ( url ) , err )
102113 return "" , fmt .Errorf ("failed to read response body: %v" , err )
103114 }
104115
105- log .Printf ("Successfully fetched %d bytes from %s" , len (body ), url )
116+ log .Printf ("Successfully fetched %d bytes from %s" , len (body ), sanitizeLogValue ( url ) )
106117
107118 content := string (body )
108119
0 commit comments