Skip to content

Commit 5419955

Browse files
authored
Merge branch 'main' into renovate/github.com-modelcontextprotocol-go-sdk-1.x
2 parents 314a961 + 7418113 commit 5419955

4 files changed

Lines changed: 24 additions & 13 deletions

File tree

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
run: task test
3838

3939
- name: Upload build artifacts
40-
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6
40+
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
4141
with:
4242
name: fetch-server
4343
path: build/fetch-server

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module github.com/stackloklabs/gofetch
22

3-
go 1.25.6
3+
go 1.25.7
44

55
require (
66
github.com/JohannesKaufmann/html-to-markdown/v2 v2.5.0

pkg/fetcher/fetcher.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ type FetchRequest struct {
4545

4646
// FetchURL retrieves and processes content from the specified URL
4747
func (f *HTTPFetcher) FetchURL(req *FetchRequest) (string, error) {
48-
log.Printf("Fetching URL: %s", req.URL)
48+
log.Printf("Fetching URL: %s", sanitizeLogValue(req.URL))
4949

5050
// Check robots.txt
5151
if !f.robotsChecker.IsAllowed(req.URL) {
52-
log.Printf("Access denied by robots.txt for URL: %s", req.URL)
52+
log.Printf("Access denied by robots.txt for URL: %s", sanitizeLogValue(req.URL))
5353
return "", fmt.Errorf("access to %s is disallowed by robots.txt", req.URL)
5454
}
5555

@@ -62,16 +62,23 @@ func (f *HTTPFetcher) FetchURL(req *FetchRequest) (string, error) {
6262
// Apply formatting
6363
formattedContent := f.processor.FormatContent(content, req.StartIndex, req.MaxLength)
6464

65-
log.Printf("Fetch completed successfully for %s, returning %d characters", req.URL, len(formattedContent))
65+
log.Printf("Fetch completed successfully for %s, returning %d characters", sanitizeLogValue(req.URL), len(formattedContent))
6666
return formattedContent, nil
6767
}
6868

69+
// sanitizeLogValue removes newlines and carriage returns to prevent log injection.
70+
func sanitizeLogValue(s string) string {
71+
s = strings.ReplaceAll(s, "\n", "")
72+
s = strings.ReplaceAll(s, "\r", "")
73+
return s
74+
}
75+
6976
// fetchURL retrieves content from the specified URL
7077
func (f *HTTPFetcher) fetchURL(url string, raw bool) (string, error) {
7178
// Create HTTP request
7279
req, err := http.NewRequest("GET", url, nil)
7380
if err != nil {
74-
log.Printf("Failed to create HTTP request for %s: %v", url, err)
81+
log.Printf("Failed to create HTTP request for %s: %v", sanitizeLogValue(url), err)
7582
return "", fmt.Errorf("failed to create request: %v", err)
7683
}
7784

@@ -80,29 +87,33 @@ func (f *HTTPFetcher) fetchURL(url string, raw bool) (string, error) {
8087
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
8188

8289
// Make HTTP request
83-
resp, err := f.httpClient.Do(req)
90+
resp, err := f.httpClient.Do(req) //nolint:gosec // This is a fetch server; fetching user-provided URLs is its core purpose
8491
if err != nil {
85-
log.Printf("HTTP request failed for %s: %v", url, err)
92+
log.Printf("HTTP request failed for %s: %v", sanitizeLogValue(url), err)
8693
return "", fmt.Errorf("failed to fetch URL: %v", err)
8794
}
8895
defer resp.Body.Close()
8996

90-
log.Printf("HTTP %d response from %s (Content-Type: %s)", resp.StatusCode, url, resp.Header.Get("Content-Type"))
97+
//nolint:gosec // URL sanitized by sanitizeLogValue; gosec can't track custom sanitizers
98+
log.Printf("HTTP %d response from %s (Content-Type: %s)",
99+
resp.StatusCode, sanitizeLogValue(url), resp.Header.Get("Content-Type"))
91100

92101
// Check status code
93102
if resp.StatusCode != http.StatusOK {
94-
log.Printf("Non-200 status code %d for %s: %s", resp.StatusCode, url, resp.Status)
103+
//nolint:gosec // URL sanitized by sanitizeLogValue; gosec can't track custom sanitizers
104+
log.Printf("Non-200 status code %d for %s: %s",
105+
resp.StatusCode, sanitizeLogValue(url), resp.Status)
95106
return "", fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
96107
}
97108

98109
// Read response body
99110
body, err := io.ReadAll(resp.Body)
100111
if err != nil {
101-
log.Printf("Failed to read response body from %s: %v", url, err)
112+
log.Printf("Failed to read response body from %s: %v", sanitizeLogValue(url), err)
102113
return "", fmt.Errorf("failed to read response body: %v", err)
103114
}
104115

105-
log.Printf("Successfully fetched %d bytes from %s", len(body), url)
116+
log.Printf("Successfully fetched %d bytes from %s", len(body), sanitizeLogValue(url))
106117

107118
content := string(body)
108119

pkg/robots/robots.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func (c *Checker) fetchRobotsContent(parsedURL *url.URL) (string, error) {
5757

5858
req.Header.Set("User-Agent", c.userAgent)
5959

60-
resp, err := c.httpClient.Do(req)
60+
resp, err := c.httpClient.Do(req) //nolint:gosec // Fetching robots.txt for user-provided URLs is expected behavior
6161
if err != nil || resp.StatusCode != 200 {
6262
return "", fmt.Errorf("failed to fetch robots.txt")
6363
}

0 commit comments

Comments
 (0)