Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
c12c33b
feat: frontend enable beta features
wjiayis Feb 1, 2026
324e602
feat: trigger auto-completion if text before is "\cite{"
wjiayis Feb 1, 2026
9a4b2d4
feat: extract last sentence
wjiayis Feb 1, 2026
4992a04
chore: remove debug logging
wjiayis Feb 1, 2026
5592197
feat: end to end inline suggestion (lots of hardcoding)
wjiayis Feb 1, 2026
bc942c3
chore: minor reformatting
wjiayis Feb 1, 2026
72167c5
chore: minor comment improvement
wjiayis Feb 1, 2026
05abfd7
chore: rename method
wjiayis Feb 1, 2026
d688433
chore: rename method
wjiayis Feb 1, 2026
56260eb
refactor: use abstracted methods
wjiayis Feb 1, 2026
d8fd357
fix: use debug conversation mode
wjiayis Feb 1, 2026
f453065
refactor: move citation method to backend
wjiayis Feb 1, 2026
ac64b91
chore: revert edit package-lock.json
wjiayis Feb 1, 2026
b6cf906
feat: always use gpt-5-nano
wjiayis Feb 1, 2026
13e8553
feat: access docs on backend
wjiayis Feb 3, 2026
672e569
feat: get bibfiles from backend
wjiayis Feb 3, 2026
99243ca
Merge pull request #109 from wjiayis/feat/tab-completion
wjiayis Feb 4, 2026
1ff6a69
feat: improve citation prompt
wjiayis Feb 4, 2026
888b66b
feat: improve citation prompt
wjiayis Feb 4, 2026
60421cb
feat: override default overleaf autocomplete
wjiayis Feb 4, 2026
a54d354
refactor: make suggestion triggers generalised
wjiayis Feb 5, 2026
65cf022
feat: use gpt-5.2 instead of gpt-5-nano to reduce latency
wjiayis Feb 5, 2026
2a00ddc
feat: move bib to the start to make use of prompt caching
wjiayis Feb 6, 2026
71e7c3f
feat: skip unimportant bib fields
wjiayis Feb 6, 2026
f049902
chore: remove debug log
wjiayis Feb 6, 2026
cc43f36
chore: update comments
wjiayis Feb 7, 2026
2faa944
chore: removal of redundant code
wjiayis Feb 7, 2026
21d89f6
chore: update comments
wjiayis Feb 7, 2026
8b7ae4c
chore: llm response edge case handling
wjiayis Feb 7, 2026
dc79e9c
chore: update comments
wjiayis Feb 7, 2026
0dacb49
refactor: update comments and improve variable naming
wjiayis Feb 7, 2026
5f6f7b8
refactor: improve variable naming
wjiayis Feb 7, 2026
f1c82af
refactor: remove unnecessary abstraction
wjiayis Feb 7, 2026
4f6b8c9
chore: simplify comments
wjiayis Feb 7, 2026
6cdc60f
feat: update frontend settings button text
wjiayis Feb 7, 2026
ebdb3d1
chore: remove a comment
wjiayis Feb 7, 2026
bae5318
feat: skip @String{} to save tokens and reduce latency
wjiayis Feb 7, 2026
90140df
fix: debug bibliography and sentence ordering
wjiayis Feb 9, 2026
aeaff3c
chore: fix typo in comment
wjiayis Feb 9, 2026
74afd87
refactor: backend return a list of citations instead of a comma-separ…
wjiayis Feb 10, 2026
7d8280d
feat: last sentence - add max length and fallback
wjiayis Feb 10, 2026
008d283
feat: get_citation_keys input validation
wjiayis Feb 10, 2026
69703f0
fix: remove unused optional model_slug in GetCitationKeysRequest
wjiayis Feb 10, 2026
26be433
feat: suppress more specific default overleaf elements
wjiayis Feb 11, 2026
93861bf
feat: add citation key test cases
wjiayis Feb 12, 2026
17b819a
feat: ignore error toast
wjiayis Feb 12, 2026
66b61c1
feat: use paper abstracts
wjiayis Feb 13, 2026
894474c
refactor: post -> get method
wjiayis Feb 14, 2026
a0dd1f6
refactor: rename enableCompletion to enableCitationSuggestion
wjiayis Feb 15, 2026
ad9d72a
feat: add more test cases
wjiayis Feb 15, 2026
05ef229
refactor: extract XtraMCPServices for session-less REST APIs
wjiayis Feb 15, 2026
8f1784f
feat: add more test cases
wjiayis Feb 15, 2026
c295dc3
feat: use batch API to get abstracts
wjiayis Feb 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,6 @@ overleaf.kubeconfig
# coverage report
coverage.out
coverage.html

# claude code
CLAUDE.md
51 changes: 51 additions & 0 deletions internal/api/chat/get_citation_keys.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package chat

import (
"context"

"paperdebugger/internal/libs/contextutil"
"paperdebugger/internal/libs/shared"
"paperdebugger/internal/models"
chatv2 "paperdebugger/pkg/gen/api/chat/v2"
)

func (s *ChatServerV2) GetCitationKeys(
ctx context.Context,
req *chatv2.GetCitationKeysRequest,
) (*chatv2.GetCitationKeysResponse, error) {
if req.GetSentence() == "" {
return nil, shared.ErrBadRequest("sentence is required")
}
if req.GetProjectId() == "" {
return nil, shared.ErrBadRequest("project_id is required")
}

actor, err := contextutil.GetActor(ctx)
if err != nil {
return nil, err
}

settings, err := s.userService.GetUserSettings(ctx, actor.ID)
if err != nil {
return nil, err
}

llmProvider := &models.LLMProviderConfig{
APIKey: settings.OpenAIAPIKey,
}

citationKeys, err := s.aiClientV2.GetCitationKeys(
ctx,
req.GetSentence(),
actor.ID,
req.GetProjectId(),
llmProvider,
)
if err != nil {
return nil, err
}

return &chatv2.GetCitationKeysResponse{
CitationKeys: citationKeys,
}, nil
}
4 changes: 2 additions & 2 deletions internal/api/mapper/user.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ func MapProtoSettingsToModel(settings *userv1.Settings) *models.Settings {
return &models.Settings{
ShowShortcutsAfterSelection: settings.ShowShortcutsAfterSelection,
FullWidthPaperDebuggerButton: settings.FullWidthPaperDebuggerButton,
EnableCompletion: settings.EnableCompletion,
EnableCitationSuggestion: settings.EnableCitationSuggestion,
FullDocumentRag: settings.FullDocumentRag,
ShowedOnboarding: settings.ShowedOnboarding,
OpenAIAPIKey: settings.OpenaiApiKey,
Expand All @@ -20,7 +20,7 @@ func MapModelSettingsToProto(settings *models.Settings) *userv1.Settings {
return &userv1.Settings{
ShowShortcutsAfterSelection: settings.ShowShortcutsAfterSelection,
FullWidthPaperDebuggerButton: settings.FullWidthPaperDebuggerButton,
EnableCompletion: settings.EnableCompletion,
EnableCitationSuggestion: settings.EnableCitationSuggestion,
FullDocumentRag: settings.FullDocumentRag,
ShowedOnboarding: settings.ShowedOnboarding,
OpenaiApiKey: settings.OpenAIAPIKey,
Expand Down
2 changes: 1 addition & 1 deletion internal/models/user.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import "go.mongodb.org/mongo-driver/v2/bson"
type Settings struct {
ShowShortcutsAfterSelection bool `bson:"show_shortcuts_after_selection"`
FullWidthPaperDebuggerButton bool `bson:"full_width_paper_debugger_button"`
EnableCompletion bool `bson:"enable_completion"`
EnableCitationSuggestion bool `bson:"enable_citation_suggestion"`
FullDocumentRag bool `bson:"full_document_rag"`
ShowedOnboarding bool `bson:"showed_onboarding"`
OpenAIAPIKey string `bson:"openai_api_key"`
Expand Down
272 changes: 272 additions & 0 deletions internal/services/toolkit/client/get_citation_keys.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
package client

// TODO: This file should not place in the client package.
import (
"context"
"fmt"
"paperdebugger/internal/models"
"paperdebugger/internal/services/toolkit/tools/xtramcp"
"regexp"
"strings"

"github.com/openai/openai-go/v3"
"go.mongodb.org/mongo-driver/v2/bson"
)

var (
// Regex patterns compiled once
titleFieldRe = regexp.MustCompile(`(?i)title\s*=\s*`) // matches "title = " prefix
entryStartRe = regexp.MustCompile(`(?i)^\s*@(\w+)\s*\{`) // eg. @article{
stringEntryRe = regexp.MustCompile(`(?i)^\s*@String\s*\{`) // eg. @String{
multiSpaceRe = regexp.MustCompile(` {2,}`)

// Fields to exclude from bibliography (not useful for citation matching)
excludedFields = []string{
"address", "institution", "pages", "eprint", "primaryclass", "volume", "number",
"edition", "numpages", "articleno", "publisher", "editor", "doi", "url", "acmid",
"issn", "archivePrefix", "year", "month", "day", "eid", "lastaccessed", "organization",
"school", "isbn", "mrclass", "mrnumber", "mrreviewer", "type", "order_no", "location",
"howpublished", "distincturl", "issue_date", "archived", "series", "source",
}
excludeFieldRe = regexp.MustCompile(`(?i)^\s*(` + strings.Join(excludedFields, "|") + `)\s*=`)
)

// braceBalance returns the net brace count (opens - closes) in a string.
func braceBalance(s string) int {
return strings.Count(s, "{") - strings.Count(s, "}")
}

// isQuoteUnclosed returns true if the string has an odd number of double quotes.
func isQuoteUnclosed(s string) bool {
return strings.Count(s, `"`)%2 == 1
}

// extractBalancedValue extracts a BibTeX field value (braced or quoted) starting at pos.
// It is needed for (1) getting full title (for abstract lookup) and (2) skipping excluded
// fields that may span multiple lines.
// Returns the extracted content and end position, or empty string and -1 if not found.
func extractBalancedValue(s string, pos int) (string, int) {
// Skip whitespace
for pos < len(s) && (s[pos] == ' ' || s[pos] == '\t' || s[pos] == '\n' || s[pos] == '\r') {
pos++
}
if pos >= len(s) {
return "", -1
}

switch s[pos] {
case '{':
depth := 0
start := pos + 1
for i := pos; i < len(s); i++ {
switch s[i] {
case '{':
depth++
case '}':
depth--
if depth == 0 {
return s[start:i], i + 1
}
}
}
case '"':
start := pos + 1
for i := start; i < len(s); i++ {
if s[i] == '"' {
return s[start:i], i + 1
}
}
}
return "", -1
}

// extractTitle extracts the title from a BibTeX entry string.
// It handles nested braces like title = {A Study of {COVID-19}}.
func extractTitle(entry string) string {
loc := titleFieldRe.FindStringIndex(entry)
if loc == nil {
return ""
}
content, _ := extractBalancedValue(entry, loc[1])
return strings.TrimSpace(content)
}

// parseBibFile extracts bibliography entries from a .bib file's lines,
// filtering out @String macros, comments, and excluded fields (url, doi, etc.).
func parseBibFile(lines []string) []string {
var entries []string
var currentEntry []string

// It handles multi-line field values by tracking brace/quote balance:
// - skipBraces > 0: currently skipping a {bracketed} value, wait until balanced
// - skipQuotes = true: currently skipping a "quoted" value, wait for closing quote

var entryDepth int // brace depth for current entry (0 = entry complete)
var skipBraces int // > 0 means we're skipping lines until braces balance
var skipQuotes bool // true means we're skipping lines until closing quote

for _, line := range lines {
// Skip empty lines and comments
if trimmed := strings.TrimSpace(line); trimmed == "" || strings.HasPrefix(trimmed, "%") {
continue
}

// If skipping a multi-line {bracketed} field value, keep skipping until balanced
if skipBraces > 0 {
skipBraces += braceBalance(line)
continue
}

// If skipping a multi-line "quoted" field value, keep skipping until closing quote
if skipQuotes {
if isQuoteUnclosed(line) { // odd quote count = found closing quote
skipQuotes = false
}
continue
}

// Skip @String{...} macro definitions
if stringEntryRe.MatchString(line) {
skipBraces = braceBalance(line)
continue
}

// Skip excluded fields (url, doi, pages, etc.) - may span multiple lines
if excludeFieldRe.MatchString(line) {
if strings.Contains(line, "={") || strings.Contains(line, "= {") {
skipBraces = braceBalance(line)
} else if strings.Contains(line, `="`) || strings.Contains(line, `= "`) {
skipQuotes = isQuoteUnclosed(line)
}
continue
}

// Start of new entry: @article{key, or @book{key, etc.
if entryStartRe.MatchString(line) {
if len(currentEntry) > 0 {
entries = append(entries, strings.Join(currentEntry, "\n"))
}
currentEntry = []string{line}
entryDepth = braceBalance(line)
continue
}

// Continue building current entry
if len(currentEntry) > 0 {
currentEntry = append(currentEntry, line)
entryDepth += braceBalance(line)
if entryDepth <= 0 { // entry complete when braces balance
entries = append(entries, strings.Join(currentEntry, "\n"))
currentEntry = nil
}
}
}

// Last entry if file doesn't end with balanced braces
if len(currentEntry) > 0 {
entries = append(entries, strings.Join(currentEntry, "\n"))
}
return entries
}

// fetchAbstracts enriches entries with abstracts from XtraMCP using batch API.
func (a *AIClientV2) fetchAbstracts(ctx context.Context, entries []string) []string {
// Extract titles
var titles []string
for _, entry := range entries {
if title := extractTitle(entry); title != "" {
titles = append(titles, title)
}
}

// Fetch abstracts and build lookup map
abstracts := make(map[string]string)
svc := xtramcp.NewXtraMCPServices(a.cfg.XtraMCPURI)
resp, err := svc.GetPaperAbstracts(ctx, titles)
if err == nil && resp.Success {
for _, r := range resp.Results {
if r.Found {
abstracts[r.Title] = r.Abstract
}
}
}

// Enrich entries
result := make([]string, len(entries))
for i, entry := range entries {
if abstract, ok := abstracts[extractTitle(entry)]; ok && abstract != "" {
if pos := strings.LastIndex(entry, "}"); pos > 0 {
result[i] = entry[:pos] + fmt.Sprintf(",\n abstract = {%s}\n}", abstract)
continue
}
}
result[i] = entry
}
return result
}

// GetBibliographyForCitation extracts bibliography content from a project's .bib files.
// It excludes non-essential fields to save tokens and fetches abstracts from XtraMCP.
func (a *AIClientV2) GetBibliographyForCitation(ctx context.Context, userId bson.ObjectID, projectId string) (string, error) {
project, err := a.projectService.GetProject(ctx, userId, projectId)
if err != nil {
return "", err
}

// Parse all .bib files
var entries []string
for _, doc := range project.Docs {
if strings.HasSuffix(doc.Filepath, ".bib") {
entries = append(entries, parseBibFile(doc.Lines)...)
}
}

// Enrich with abstracts
entries = a.fetchAbstracts(ctx, entries)

// Join and normalize
bibliography := strings.Join(entries, "\n")
return multiSpaceRe.ReplaceAllString(bibliography, " "), nil
}

func (a *AIClientV2) GetCitationKeys(ctx context.Context, sentence string, userId bson.ObjectID, projectId string, llmProvider *models.LLMProviderConfig) ([]string, error) {
bibliography, err := a.GetBibliographyForCitation(ctx, userId, projectId)

if err != nil {
return nil, err
}

emptyCitation := "none"

// Bibliography is placed at the start of the prompt to leverage prompt caching
message := fmt.Sprintf("Bibliography: %s\nSentence: %s\nBased on the sentence and bibliography, suggest only the most relevant citation keys separated by commas with no spaces (e.g. key1,key2). Be selective and only include citations that are directly relevant. Avoid suggesting more than 3 citations. If no relevant citations are found, return '%s'.", bibliography, sentence, emptyCitation)

_, resp, err := a.ChatCompletionV2(ctx, "gpt-5.2", OpenAIChatHistory{
openai.SystemMessage("You are a helpful assistant that suggests relevant citation keys."),
openai.UserMessage(message),
}, llmProvider)

if err != nil {
return nil, err
}

if len(resp) == 0 {
return []string{}, nil
}

citationKeysStr := strings.TrimSpace(resp[0].Payload.GetAssistant().GetContent())

if citationKeysStr == "" || citationKeysStr == emptyCitation {
return []string{}, nil
}

// Parse comma-separated keys
var result []string
for _, key := range strings.Split(citationKeysStr, ",") {
if trimmed := strings.TrimSpace(key); trimmed != "" {
result = append(result, trimmed)
}
}

return result, nil
}
Loading