Skip to content

Commit b6aa8a6

Browse files
committed
Add unit and end-to-end coverage with CI quality gates
1 parent db7b18d commit b6aa8a6

5 files changed

Lines changed: 540 additions & 2 deletions

File tree

.github/workflows/ci.yml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,21 @@ jobs:
3232
- name: Vet
3333
run: go vet ./...
3434

35-
- name: Test
36-
run: go test ./...
35+
- name: Unit test + coverage
36+
run: |
37+
go list ./... | grep -v '^transblog/e2e$' | xargs go test -covermode=atomic -coverprofile=coverage.out
38+
39+
TOTAL=$(go tool cover -func=coverage.out | awk '/^total:/ {gsub("%", "", $3); print $3}')
40+
THRESHOLD=45
41+
awk -v total="$TOTAL" -v threshold="$THRESHOLD" 'BEGIN {
42+
if ((total + 0) < threshold) {
43+
printf("coverage %.2f%% is below threshold %d%%\n", total + 0, threshold)
44+
exit 1
45+
}
46+
}'
47+
48+
- name: Integration tests (e2e)
49+
run: go test ./e2e -count=1
3750

3851
- name: Build
3952
run: go build ./cmd/transblog

e2e/run_e2e_test.go

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
package e2e
2+
3+
import (
4+
"bytes"
5+
"crypto/sha1"
6+
"encoding/hex"
7+
"encoding/json"
8+
"io"
9+
"net/http"
10+
"net/http/httptest"
11+
"os"
12+
"path/filepath"
13+
"strings"
14+
"sync/atomic"
15+
"testing"
16+
17+
"transblog/internal/cli"
18+
)
19+
20+
func TestE2ESingleURLSuccess(t *testing.T) {
21+
contentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
22+
if r.URL.Path != "/post" {
23+
http.NotFound(w, r)
24+
return
25+
}
26+
w.Header().Set("Content-Type", "text/html; charset=utf-8")
27+
_, _ = w.Write([]byte(sampleArticle("Single", "single post body")))
28+
}))
29+
t.Cleanup(contentServer.Close)
30+
31+
openAIServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
32+
if r.URL.Path != "/v1/responses" {
33+
http.NotFound(w, r)
34+
return
35+
}
36+
w.Header().Set("Content-Type", "application/json")
37+
_, _ = io.WriteString(w, `{"output_text":"# Single\n\ntranslated content"}`)
38+
}))
39+
t.Cleanup(openAIServer.Close)
40+
41+
t.Setenv("OPENAI_API_KEY", "test-key")
42+
t.Setenv("OPENAI_BASE_URL", openAIServer.URL)
43+
44+
tmpDir := t.TempDir()
45+
runInWorkingDir(t, tmpDir, func() {
46+
var stdout bytes.Buffer
47+
var stderr bytes.Buffer
48+
sourceURL := contentServer.URL + "/post"
49+
if err := cli.Run([]string{"--chunk-size", "10000", sourceURL}, &stdout, &stderr); err != nil {
50+
t.Fatalf("Run() error = %v; stderr=%s", err, stderr.String())
51+
}
52+
53+
matches, err := filepath.Glob(filepath.Join(tmpDir, "out", "*.md"))
54+
if err != nil {
55+
t.Fatalf("glob output file: %v", err)
56+
}
57+
if len(matches) != 1 {
58+
t.Fatalf("output files len=%d, want 1", len(matches))
59+
}
60+
61+
content, err := os.ReadFile(matches[0])
62+
if err != nil {
63+
t.Fatalf("read output: %v", err)
64+
}
65+
text := string(content)
66+
if !strings.Contains(text, "## Source: Single ("+sourceURL+")") {
67+
t.Fatalf("output missing source metadata: %s", text)
68+
}
69+
if !strings.Contains(text, "translated content") {
70+
t.Fatalf("output missing translated text: %s", text)
71+
}
72+
})
73+
}
74+
75+
func TestE2EMultiURLPartialFailure(t *testing.T) {
76+
contentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
77+
switch r.URL.Path {
78+
case "/ok":
79+
w.Header().Set("Content-Type", "text/html; charset=utf-8")
80+
_, _ = w.Write([]byte(sampleArticle("OK", "content ok")))
81+
case "/bad":
82+
http.NotFound(w, r)
83+
default:
84+
http.NotFound(w, r)
85+
}
86+
}))
87+
t.Cleanup(contentServer.Close)
88+
89+
openAIServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
90+
if r.URL.Path != "/v1/responses" {
91+
http.NotFound(w, r)
92+
return
93+
}
94+
w.Header().Set("Content-Type", "application/json")
95+
_, _ = io.WriteString(w, `{"output_text":"# Success\n\nnormal translated text"}`)
96+
}))
97+
t.Cleanup(openAIServer.Close)
98+
99+
t.Setenv("OPENAI_API_KEY", "test-key")
100+
t.Setenv("OPENAI_BASE_URL", openAIServer.URL)
101+
102+
tmpDir := t.TempDir()
103+
runInWorkingDir(t, tmpDir, func() {
104+
okURL := contentServer.URL + "/ok"
105+
badURL := contentServer.URL + "/bad"
106+
107+
var stdout bytes.Buffer
108+
var stderr bytes.Buffer
109+
err := cli.Run([]string{"--chunk-size", "10000", okURL, badURL}, &stdout, &stderr)
110+
if err == nil {
111+
t.Fatalf("Run() error = nil, want partial-failure error")
112+
}
113+
114+
summaryPath := filepath.Join(tmpDir, "out", "_summary.json")
115+
summaryData, readErr := os.ReadFile(summaryPath)
116+
if readErr != nil {
117+
t.Fatalf("read summary: %v", readErr)
118+
}
119+
120+
var summary struct {
121+
SuccessCount int `json:"success_count"`
122+
FailureCount int `json:"failure_count"`
123+
Results []struct {
124+
SourceURL string `json:"source_url"`
125+
Success bool `json:"success"`
126+
ErrorType string `json:"error_type"`
127+
} `json:"results"`
128+
}
129+
if err := json.Unmarshal(summaryData, &summary); err != nil {
130+
t.Fatalf("unmarshal summary: %v", err)
131+
}
132+
133+
if summary.SuccessCount != 1 || summary.FailureCount != 1 {
134+
t.Fatalf("summary counts = (%d,%d), want (1,1)", summary.SuccessCount, summary.FailureCount)
135+
}
136+
if len(summary.Results) != 2 {
137+
t.Fatalf("summary results len=%d, want 2", len(summary.Results))
138+
}
139+
140+
var sawFetchFailure bool
141+
for _, result := range summary.Results {
142+
if result.SourceURL == badURL {
143+
sawFetchFailure = true
144+
if result.Success {
145+
t.Fatalf("bad URL result marked success")
146+
}
147+
if result.ErrorType != "fetch_failed" {
148+
t.Fatalf("bad URL error_type=%q, want fetch_failed", result.ErrorType)
149+
}
150+
}
151+
}
152+
if !sawFetchFailure {
153+
t.Fatalf("summary missing bad URL result")
154+
}
155+
})
156+
}
157+
158+
func TestE2EResumeAfterInterruptedRun(t *testing.T) {
159+
contentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
160+
if r.URL.Path != "/long" {
161+
http.NotFound(w, r)
162+
return
163+
}
164+
w.Header().Set("Content-Type", "text/html; charset=utf-8")
165+
_, _ = w.Write([]byte(sampleLongArticle("Resume")))
166+
}))
167+
t.Cleanup(contentServer.Close)
168+
169+
var phase int32 = 1
170+
var phase1Calls int32
171+
var phase2Calls int32
172+
openAIServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
173+
if r.URL.Path != "/v1/responses" {
174+
http.NotFound(w, r)
175+
return
176+
}
177+
178+
body, _ := io.ReadAll(r.Body)
179+
if atomic.LoadInt32(&phase) == 1 {
180+
if atomic.AddInt32(&phase1Calls, 1) == 3 {
181+
w.WriteHeader(http.StatusBadRequest)
182+
_, _ = io.WriteString(w, `{"error":{"message":"forced interruption"}}`)
183+
return
184+
}
185+
} else {
186+
atomic.AddInt32(&phase2Calls, 1)
187+
}
188+
189+
sum := sha1.Sum(body)
190+
translated := "# title\n\ntranslated-" + hex.EncodeToString(sum[:8])
191+
encoded, _ := json.Marshal(translated)
192+
w.Header().Set("Content-Type", "application/json")
193+
_, _ = io.WriteString(w, `{"output_text":`+string(encoded)+`}`)
194+
}))
195+
t.Cleanup(openAIServer.Close)
196+
197+
t.Setenv("OPENAI_API_KEY", "test-key")
198+
t.Setenv("OPENAI_BASE_URL", openAIServer.URL)
199+
200+
tmpDir := t.TempDir()
201+
runInWorkingDir(t, tmpDir, func() {
202+
sourceURL := contentServer.URL + "/long"
203+
var stdout bytes.Buffer
204+
var stderr bytes.Buffer
205+
206+
firstErr := cli.Run([]string{"--chunk-size", "60", sourceURL}, &stdout, &stderr)
207+
if firstErr == nil {
208+
t.Fatalf("first Run() error = nil, want interruption error")
209+
}
210+
211+
statePath := filepath.Join(tmpDir, "out", ".transblog.state.json")
212+
stateData, err := os.ReadFile(statePath)
213+
if err != nil {
214+
t.Fatalf("read state file: %v", err)
215+
}
216+
217+
var state struct {
218+
URLs map[string]struct {
219+
ChunkCount int `json:"chunk_count"`
220+
Chunks map[string]struct {
221+
Source string `json:"source"`
222+
Translated string `json:"translated"`
223+
} `json:"chunks"`
224+
} `json:"urls"`
225+
}
226+
if err := json.Unmarshal(stateData, &state); err != nil {
227+
t.Fatalf("unmarshal state file: %v", err)
228+
}
229+
entry, ok := state.URLs[sourceURL]
230+
if !ok {
231+
t.Fatalf("state missing URL entry for %s", sourceURL)
232+
}
233+
savedChunks := len(entry.Chunks)
234+
if savedChunks == 0 || savedChunks >= entry.ChunkCount {
235+
t.Fatalf("saved chunks=%d, chunk_count=%d; want partial save", savedChunks, entry.ChunkCount)
236+
}
237+
238+
atomic.StoreInt32(&phase, 2)
239+
stdout.Reset()
240+
stderr.Reset()
241+
242+
if err := cli.Run([]string{"--chunk-size", "60", sourceURL}, &stdout, &stderr); err != nil {
243+
t.Fatalf("second Run() error = %v; stderr=%s", err, stderr.String())
244+
}
245+
246+
if got, want := int(atomic.LoadInt32(&phase2Calls)), entry.ChunkCount-savedChunks; got != want {
247+
t.Fatalf("resume calls=%d, want %d", got, want)
248+
}
249+
if _, err := os.Stat(statePath); !os.IsNotExist(err) {
250+
t.Fatalf("state file should be removed after completion, stat err=%v", err)
251+
}
252+
})
253+
}
254+
255+
func runInWorkingDir(t *testing.T, dir string, fn func()) {
256+
t.Helper()
257+
258+
originalDir, err := os.Getwd()
259+
if err != nil {
260+
t.Fatalf("Getwd: %v", err)
261+
}
262+
if err := os.Chdir(dir); err != nil {
263+
t.Fatalf("Chdir(%q): %v", dir, err)
264+
}
265+
defer func() {
266+
_ = os.Chdir(originalDir)
267+
}()
268+
269+
fn()
270+
}
271+
272+
func sampleArticle(title string, text string) string {
273+
return "<!doctype html><html><head><title>" + title + "</title></head><body><article><h1>" + title + "</h1><p>" + text + " paragraph with enough length for readability extraction.</p></article></body></html>"
274+
}
275+
276+
func sampleLongArticle(title string) string {
277+
paragraphs := []string{
278+
"This is the first long paragraph to force markdown chunking and resume behavior verification for the integration test suite.",
279+
"This is the second long paragraph with extra descriptive words so the chunk splitter has enough material to cut into multiple segments.",
280+
"This is the third long paragraph that forces a controlled interruption in the first run.",
281+
"This is the fourth long paragraph to ensure there are more chunks than one worker request, making partial completion observable.",
282+
}
283+
284+
var b strings.Builder
285+
b.WriteString("<!doctype html><html><head><title>")
286+
b.WriteString(title)
287+
b.WriteString("</title></head><body><article><h1>")
288+
b.WriteString(title)
289+
b.WriteString("</h1>")
290+
for _, p := range paragraphs {
291+
b.WriteString("<p>")
292+
b.WriteString(p)
293+
b.WriteString("</p>")
294+
}
295+
b.WriteString("</article></body></html>")
296+
return b.String()
297+
}

internal/fetch/fetch_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,53 @@ func TestHTMLFallsBackToOriginalBodyWhenExtractionFails(t *testing.T) {
9393
}
9494
}
9595

96+
func TestHTMLReturnsStatusErrorWithBodySnippet(t *testing.T) {
97+
t.Parallel()
98+
99+
const errBody = "missing page"
100+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
101+
w.WriteHeader(http.StatusNotFound)
102+
_, _ = w.Write([]byte(errBody))
103+
}))
104+
t.Cleanup(server.Close)
105+
106+
httpClient := server.Client()
107+
httpClient.Timeout = 5 * time.Second
108+
109+
_, err := HTML(context.Background(), httpClient, server.URL)
110+
if err == nil {
111+
t.Fatalf("HTML() error = nil, want status error")
112+
}
113+
if !strings.Contains(err.Error(), "unexpected status 404") {
114+
t.Fatalf("HTML() error = %v, want status details", err)
115+
}
116+
if !strings.Contains(err.Error(), errBody) {
117+
t.Fatalf("HTML() error = %v, want body snippet %q", err, errBody)
118+
}
119+
}
120+
121+
func TestHTMLReturnsTimeoutError(t *testing.T) {
122+
t.Parallel()
123+
124+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
125+
time.Sleep(200 * time.Millisecond)
126+
w.Header().Set("Content-Type", "text/html; charset=utf-8")
127+
_, _ = w.Write([]byte("<html><head><title>slow</title></head><body>slow</body></html>"))
128+
}))
129+
t.Cleanup(server.Close)
130+
131+
httpClient := server.Client()
132+
httpClient.Timeout = 50 * time.Millisecond
133+
134+
_, err := HTML(context.Background(), httpClient, server.URL)
135+
if err == nil {
136+
t.Fatalf("HTML() error = nil, want timeout")
137+
}
138+
if !strings.Contains(err.Error(), "download URL") {
139+
t.Fatalf("HTML() error = %v, want wrapped download error", err)
140+
}
141+
}
142+
96143
func TestExtractTitle(t *testing.T) {
97144
t.Parallel()
98145

0 commit comments

Comments
 (0)