feat(llm): add openai chat completions

Congregalis · Congregalis · commit cfec2c999ac9 · 2026-03-19T19:32:22.000+08:00
diff --git a/.env.example b/.env.example
@@ -1,3 +1,17 @@
+# API mode: "responses" (default) or "chat" for Chat Completions API
+# CLI flag: --api-mode=chat
+OPENAI_API_MODE=responses
+
+# Default API key (used as fallback for both modes)
 OPENAI_API_KEY=sk-your-key-here
-# Optional: custom OpenAI-compatible gateway
+# Default base URL (used as fallback for both modes)
 OPENAI_BASE_URL=https://api.openai.com
+
+# Responses API (default mode)
+# Override with mode-specific keys/URLs for cleaner switching
+# OPENAI_RESPONSES_API_KEY=
+# OPENAI_RESPONSES_BASE_URL=
+
+# Chat Completions API (use --api-mode=chat to enable)
+# OPENAI_CHAT_API_KEY=
+# OPENAI_CHAT_BASE_URL=
diff --git a/internal/cli/run.go b/internal/cli/run.go
@@ -71,6 +71,7 @@ type options struct {
 	Refresh      bool
 	Timeout      time.Duration
 	ChunkSize    int
+	APIMode      string
 	ShowHelp     bool
 	SourceURLs   []string
 	setFlags     map[string]bool
@@ -287,8 +288,40 @@ func Run(args []string, stdout io.Writer, stderr io.Writer) error {
 	}
 	applyEnvDefaults(&opts, envValues)
 
-	apiKey := strings.TrimSpace(os.Getenv("OPENAI_API_KEY"))
 	opts.SourceURLs = normalizeSourceURLs(opts.SourceURLs)
+
+	// Determine API mode first (CLI flag > env var > default)
+	apiMode := opts.APIMode
+	if apiMode == "" {
+		apiMode = strings.TrimSpace(os.Getenv("OPENAI_API_MODE"))
+	}
+	if apiMode == "" {
+		apiMode = "responses"
+	}
+
+	// Read API key and base URL based on mode
+	var apiKey, baseURL string
+	switch apiMode {
+	case "chat":
+		apiKey = strings.TrimSpace(os.Getenv("OPENAI_CHAT_API_KEY"))
+		baseURL = strings.TrimSpace(os.Getenv("OPENAI_CHAT_BASE_URL"))
+		if apiKey == "" {
+			apiKey = strings.TrimSpace(os.Getenv("OPENAI_API_KEY"))
+		}
+		if baseURL == "" {
+			baseURL = strings.TrimSpace(os.Getenv("OPENAI_BASE_URL"))
+		}
+	default:
+		apiKey = strings.TrimSpace(os.Getenv("OPENAI_RESPONSES_API_KEY"))
+		baseURL = strings.TrimSpace(os.Getenv("OPENAI_RESPONSES_BASE_URL"))
+		if apiKey == "" {
+			apiKey = strings.TrimSpace(os.Getenv("OPENAI_API_KEY"))
+		}
+		if baseURL == "" {
+			baseURL = strings.TrimSpace(os.Getenv("OPENAI_BASE_URL"))
+		}
+	}
+
 	if apiKey == "" || len(opts.SourceURLs) == 0 {
 		opts, apiKey, err = runOnboarding(opts, envPath, envValues, apiKey, stdout, stderr)
 		if err != nil {
@@ -302,7 +335,6 @@ func Run(args []string, stdout io.Writer, stderr io.Writer) error {
 		return errors.New("at least one URL is required")
 	}
 
-	baseURL := strings.TrimSpace(os.Getenv("OPENAI_BASE_URL"))
 	httpClient := &http.Client{Timeout: opts.Timeout}
 
 	glossaryMap, err := glossary.Load(opts.Glossary)
@@ -341,7 +373,13 @@ func Run(args []string, stdout io.Writer, stderr io.Writer) error {
 		return err
 	}
 
-	openAIClient := openai.NewClient(apiKey, baseURL, httpClient, opts.MaxRetries)
+	var openAIClient openai.Translator
+	switch apiMode {
+	case "chat":
+		openAIClient = openai.NewChatClient(apiKey, baseURL, httpClient, opts.MaxRetries)
+	default:
+		openAIClient = openai.NewClient(apiKey, baseURL, httpClient, opts.MaxRetries)
+	}
 	runCtx, stopSignal := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
 	defer stopSignal()
 	runStart := time.Now()
@@ -515,6 +553,7 @@ func parseFlags(args []string, stderr io.Writer) (options, error) {
 	fs.StringVar(&opts.Glossary, "glossary", "", "Path to glossary JSON map, e.g. {\"term\":\"translation\"}")
 	fs.DurationVar(&opts.Timeout, "timeout", 90*time.Second, "HTTP timeout, e.g. 120s")
 	fs.IntVar(&opts.ChunkSize, "chunk-size", defaultChunkSize, "Target chunk size in characters")
+	fs.StringVar(&opts.APIMode, "api-mode", "", "API mode: responses or chat (default: responses)")
 
 	fs.Usage = func() {
 		fmt.Fprintln(stderr, "Usage: transblog [flags] <url> [url...]")
@@ -1593,7 +1632,7 @@ func pairsToCachedPairs(items []pair) []cachedPair {
 func processURL(
 	ctx context.Context,
 	httpClient *http.Client,
-	openAIClient *openai.Client,
+	openAIClient openai.Translator,
 	opts options,
 	glossaryMap map[string]string,
 	prices priceConfig,
@@ -2193,7 +2232,7 @@ type translationResult struct {
 
 func translateAllChunks(
 	ctx context.Context,
-	client *openai.Client,
+	client openai.Translator,
 	model string,
 	glossaryMap map[string]string,
 	tasks []translationTask,
@@ -2360,7 +2399,7 @@ func translateAllChunks(
 
 func translateChunkWithQualityGuard(
 	ctx context.Context,
-	client *openai.Client,
+	client openai.Translator,
 	model string,
 	sourceChunk string,
 	glossaryMap map[string]string,
diff --git a/internal/openai/chat.go b/internal/openai/chat.go
@@ -0,0 +1,258 @@
+package openai
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"math/rand"
+	"net/http"
+	"strconv"
+	"strings"
+	"time"
+)
+
+type ChatClient struct {
+	apiKey     string
+	endpoint   string
+	httpClient *http.Client
+	maxRetries int
+}
+
+func NewChatClient(apiKey string, baseURL string, httpClient *http.Client, maxRetries int) *ChatClient {
+	if strings.TrimSpace(baseURL) == "" {
+		baseURL = defaultBaseURL
+	}
+	baseURL = strings.TrimSuffix(strings.TrimSpace(baseURL), "/")
+	if strings.HasSuffix(baseURL, "/v1") {
+		baseURL = strings.TrimSuffix(baseURL, "/v1")
+	}
+	if maxRetries < 0 {
+		maxRetries = defaultMaxRetries
+	}
+
+	return &ChatClient{
+		apiKey:     apiKey,
+		endpoint:   baseURL + "/v1/chat/completions",
+		httpClient: httpClient,
+		maxRetries: maxRetries,
+	}
+}
+
+func (c *ChatClient) TranslateMarkdownChunk(ctx context.Context, model string, mdChunk string, glossaryMap map[string]string) (string, error) {
+	translated, _, err := c.TranslateMarkdownChunkWithUsage(ctx, model, mdChunk, glossaryMap)
+	return translated, err
+}
+
+func (c *ChatClient) TranslateMarkdownChunkWithUsage(
+	ctx context.Context,
+	model string,
+	mdChunk string,
+	glossaryMap map[string]string,
+) (string, Usage, error) {
+	return c.translateMarkdownChunk(ctx, model, mdChunk, glossaryMap, false, "")
+}
+
+func (c *ChatClient) TranslateMarkdownChunkStrict(
+	ctx context.Context,
+	model string,
+	mdChunk string,
+	glossaryMap map[string]string,
+	failureReason string,
+) (string, error) {
+	translated, _, err := c.TranslateMarkdownChunkStrictWithUsage(ctx, model, mdChunk, glossaryMap, failureReason)
+	return translated, err
+}
+
+func (c *ChatClient) TranslateMarkdownChunkStrictWithUsage(
+	ctx context.Context,
+	model string,
+	mdChunk string,
+	glossaryMap map[string]string,
+	failureReason string,
+) (string, Usage, error) {
+	return c.translateMarkdownChunk(ctx, model, mdChunk, glossaryMap, true, failureReason)
+}
+
+func (c *ChatClient) translateMarkdownChunk(
+	ctx context.Context,
+	model string,
+	mdChunk string,
+	glossaryMap map[string]string,
+	strict bool,
+	failureReason string,
+) (string, Usage, error) {
+	systemPrompt := buildSystemPrompt(strict)
+	userPrompt := buildUserPrompt(mdChunk, glossaryMap, strict, failureReason)
+
+	// Build messages array for Chat Completions API
+	payload := map[string]any{
+		"model": model,
+		"messages": []map[string]any{
+			{
+				"role":    "system",
+				"content": systemPrompt,
+			},
+			{
+				"role":    "user",
+				"content": userPrompt,
+			},
+		},
+	}
+
+	body, err := json.Marshal(payload)
+	if err != nil {
+		return "", Usage{}, fmt.Errorf("marshal OpenAI request: %w", err)
+	}
+
+	var lastErr error
+	for attempt := 0; attempt <= c.maxRetries; attempt++ {
+		translated, usage, retry, err := c.callChatCompletions(ctx, body)
+		if err == nil {
+			return translated, usage, nil
+		}
+
+		lastErr = err
+		if !retry || attempt == c.maxRetries {
+			break
+		}
+
+		delay := backoffDelay(attempt)
+		select {
+		case <-time.After(delay):
+		case <-ctx.Done():
+			return "", Usage{}, ctx.Err()
+		}
+	}
+
+	if lastErr == nil {
+		lastErr = errors.New("unknown translation error")
+	}
+	return "", Usage{}, lastErr
+}
+
+func (c *ChatClient) callChatCompletions(ctx context.Context, body []byte) (translated string, usage Usage, retry bool, err error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.endpoint, bytes.NewReader(body))
+	if err != nil {
+		return "", Usage{}, false, fmt.Errorf("build OpenAI request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+c.apiKey)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return "", Usage{}, true, fmt.Errorf("request OpenAI Chat Completions API: %w", err)
+	}
+	defer resp.Body.Close()
+
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return "", Usage{}, true, fmt.Errorf("read OpenAI response body: %w", err)
+	}
+
+	if resp.StatusCode < 200 || resp.StatusCode > 299 {
+		message := parseAPIError(respBody)
+		err := fmt.Errorf("OpenAI Chat Completions API status %d: %s", resp.StatusCode, message)
+		if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode >= 500 {
+			if retryAfter := parseRetryAfter(resp.Header.Get("Retry-After")); retryAfter > 0 {
+				select {
+				case <-time.After(retryAfter):
+				case <-ctx.Done():
+					return "", Usage{}, false, ctx.Err()
+				}
+			}
+			return "", Usage{}, true, err
+		}
+		return "", Usage{}, false, err
+	}
+
+	output, err := extractChatOutputText(respBody)
+	if err != nil {
+		return "", Usage{}, false, err
+	}
+	usage = extractChatUsage(respBody)
+	return output, usage, false, nil
+}
+
+func extractChatOutputText(body []byte) (string, error) {
+	var parsed struct {
+		Choices []struct {
+			Message struct {
+				Content string `json:"content"`
+			} `json:"message"`
+		} `json:"choices"`
+	}
+
+	if err := json.Unmarshal(body, &parsed); err != nil {
+		return "", fmt.Errorf("parse OpenAI Chat response JSON: %w", err)
+	}
+
+	if len(parsed.Choices) == 0 {
+		return "", fmt.Errorf("OpenAI Chat response has no choices")
+	}
+
+	content := strings.TrimSpace(parsed.Choices[0].Message.Content)
+	if content == "" {
+		return "", fmt.Errorf("OpenAI Chat response missing message content")
+	}
+
+	return content, nil
+}
+
+func extractChatUsage(body []byte) Usage {
+	var parsed struct {
+		Usage struct {
+			PromptTokens     int64 `json:"prompt_tokens"`
+			CompletionTokens int64 `json:"completion_tokens"`
+			TotalTokens      int64 `json:"total_tokens"`
+		} `json:"usage"`
+	}
+
+	if err := json.Unmarshal(body, &parsed); err != nil {
+		return Usage{}
+	}
+
+	if parsed.Usage.PromptTokens == 0 && parsed.Usage.CompletionTokens == 0 && parsed.Usage.TotalTokens == 0 {
+		return Usage{}
+	}
+
+	return Usage{
+		InputTokens:  parsed.Usage.PromptTokens,
+		OutputTokens: parsed.Usage.CompletionTokens,
+		TotalTokens:  parsed.Usage.TotalTokens,
+		Available:    true,
+	}
+}
+
+func parseRetryAfterChat(value string) time.Duration {
+	value = strings.TrimSpace(value)
+	if value == "" {
+		return 0
+	}
+
+	if seconds, err := strconv.Atoi(value); err == nil && seconds > 0 {
+		return time.Duration(seconds) * time.Second
+	}
+
+	if ts, err := http.ParseTime(value); err == nil {
+		delta := time.Until(ts)
+		if delta > 0 {
+			return delta
+		}
+	}
+
+	return 0
+}
+
+func backoffDelayChat(attempt int) time.Duration {
+	base := time.Second
+	delay := base * time.Duration(1<<attempt)
+	jitter := time.Duration(rand.Intn(250)) * time.Millisecond
+	max := 30 * time.Second
+	if delay+jitter > max {
+		return max
+	}
+	return delay + jitter
+}
diff --git a/internal/openai/responses.go b/internal/openai/responses.go