Skip to content

Commit f95426a

Browse files
authored
feat:Enhance RAG service and unify knowledge retrieval functionality (#425)
* feat: update LightRAG integration and improve RAG service functionality * feat: implement unified retrieval service for knowledge base queries * feat: enhance file handling and RAG processing with utility functions and improved logging * feat: refactor knowledge retrieval logic and unify result structure * feat: enhance KnowledgeBaseDetail with Markdown rendering and syntax highlighting * feat: simplify KnowledgeBaseDetail layout and improve text rendering * feat: simplify KnowledgeBaseDetail layout and improve text rendering * feat: simplify KnowledgeBaseDetail layout and improve text rendering
1 parent 4c5cc9d commit f95426a

26 files changed

Lines changed: 3258 additions & 2675 deletions

frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx

Lines changed: 170 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import type React from "react";
22
import { useEffect, useState } from "react";
33
import { Table, Badge, Button, Breadcrumb, Tooltip, App, Card, Input, Empty, Spin } from "antd";
4+
import ReactMarkdown from "react-markdown";
5+
import remarkGfm from "remark-gfm";
6+
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
7+
import { vscDarkPlus } from "react-syntax-highlighter/dist/esm/styles/prism";
48
import {
59
DeleteOutlined,
610
EditOutlined,
@@ -18,30 +22,41 @@ import {
1822
queryKnowledgeBaseByIdUsingGet,
1923
queryKnowledgeBaseFilesUsingGet,
2024
retrieveKnowledgeBaseContent,
21-
fetchKnowledgeGraph,
25+
queryKnowledgeBase,
2226
} from "../knowledge-base.api";
2327
import useFetchData from "@/hooks/useFetchData";
2428
import AddDataDialog from "../components/AddDataDialog";
2529
import CreateKnowledgeBase from "../components/CreateKnowledgeBase";
2630
import KnowledgeGraphView, { GraphEntitySelection } from "../components/KnowledgeGraphView";
27-
import { Network } from "lucide-react";
2831
import { useTranslation } from "react-i18next";
2932

3033
interface StatisticItem {
3134
icon?: React.ReactNode;
3235
label: string;
3336
value: string | number;
3437
}
35-
interface RagChunk {
38+
// Use UnifiedSearchResult from model - flat structure from backend
39+
// Backend returns: { id, text, score, metadata, resultType, knowledgeBaseId, knowledgeBaseName }
40+
interface RecallResult {
3641
id: string;
3742
text: string;
38-
metadata: string;
39-
}
40-
interface RecallResult {
4143
score: number;
42-
entity: RagChunk;
43-
id?: string | object;
44-
primaryKey?: string;
44+
metadata: Record<string, any>;
45+
resultType?: string;
46+
knowledgeBaseId?: string;
47+
knowledgeBaseName?: string;
48+
}
49+
50+
function squashSoftLineBreaksOutsideFences(markdown: string): string {
51+
if (!markdown) return "";
52+
const parts = markdown.split(/(```[\s\S]*?```)/g);
53+
return parts
54+
.map((part) => {
55+
if (part.startsWith("```")) return part;
56+
// keep paragraph breaks (\n\n), but squash single newlines into spaces
57+
return part.replace(/([^\n])\n(?!\n)/g, "$1 ");
58+
})
59+
.join("");
4560
}
4661

4762
const KnowledgeBaseDetailPage: React.FC = () => {
@@ -153,7 +168,7 @@ const KnowledgeBaseDetailPage: React.FC = () => {
153168
setGraphLoading(true);
154169
setGraphSelection(null);
155170
try {
156-
const { data } = await fetchKnowledgeGraph({ knowledge_base_id: knowledgeBase.id, query: "*" });
171+
const { data } = await queryKnowledgeBase({ knowledge_base_id: knowledgeBase.id, query: "*" });
157172
setGraphData({ nodes: data?.nodes ?? [], edges: data?.edges ?? [] });
158173
} catch {
159174
setGraphData({ nodes: [], edges: [] });
@@ -179,15 +194,6 @@ const KnowledgeBaseDetailPage: React.FC = () => {
179194
};
180195

181196
type DetailOperation = NonNullable<React.ComponentProps<typeof DetailHeader>["operations"][number]>;
182-
const graphOperation: DetailOperation | null = knowledgeBase?.type === KBType.GRAPH
183-
? {
184-
key: "graph",
185-
label: t("knowledgeBase.detail.graph.title"),
186-
icon: <Network />,
187-
onClick: handleOpenGraph,
188-
}
189-
: null;
190-
191197
const baseOperations: DetailOperation[] = [
192198
{
193199
key: "edit",
@@ -221,7 +227,7 @@ const KnowledgeBaseDetailPage: React.FC = () => {
221227
},
222228
];
223229

224-
const operations: DetailOperation[] = [graphOperation, ...baseOperations].filter(Boolean) as DetailOperation[];
230+
const operations: DetailOperation[] = baseOperations;
225231

226232
const fileOps = [
227233
{
@@ -242,7 +248,17 @@ const KnowledgeBaseDetailPage: React.FC = () => {
242248
ellipsis: true,
243249
fixed: "left" as const,
244250
render: (_: unknown, file: KBFile) => (
245-
<a onClick={() => navigate(`/data/knowledge-base/file-detail/${file.id}?knowledgeBaseId=${knowledgeBase?.id || ''}&fileName=${encodeURIComponent(file.name || file.fileName || '')}`)}>
251+
<a
252+
onClick={() => {
253+
if (knowledgeBase?.type === KBType.GRAPH) {
254+
handleOpenGraph();
255+
return;
256+
}
257+
navigate(
258+
`/data/knowledge-base/file-detail/${file.id}?knowledgeBaseId=${knowledgeBase?.id || ""}&fileName=${encodeURIComponent(file.name || file.fileName || "")}`
259+
);
260+
}}
261+
>
246262
{file.name}
247263
</a>
248264
)
@@ -478,16 +494,146 @@ const KnowledgeBaseDetailPage: React.FC = () => {
478494
<Spin className="mt-8" />
479495
) : recallResults.length === 0 ? (
480496
<Empty description={t("knowledgeBase.detail.recallTest.noResult")} />
497+
) : knowledgeBase?.type === KBType.GRAPH ? (
498+
<div className="w-full">
499+
{(() => {
500+
const item = recallResults[0];
501+
if (!item) return null;
502+
return (
503+
<div className="border border-gray-200 rounded-lg bg-white overflow-hidden">
504+
<div className="flex items-center justify-between px-5 py-3 bg-gradient-to-r from-slate-50 to-gray-50 border-b border-gray-200">
505+
<div className="text-xs text-gray-500 font-mono break-all">
506+
ID: {item.id ?? "-"}
507+
</div>
508+
</div>
509+
<div className="p-5">
510+
<div className="prose prose-slate prose-sm max-w-none
511+
prose-headings:text-slate-800 prose-headings:font-semibold
512+
prose-p:text-gray-700 prose-p:leading-relaxed prose-p:m-0
513+
prose-a:text-blue-600 prose-a:no-underline hover:prose-a:underline
514+
prose-strong:text-slate-800 prose-em:text-slate-600
515+
prose-li:text-gray-700
516+
prose-code:before:content-none prose-code:after:content-none
517+
prose-code:bg-slate-100 prose-code:text-rose-600 prose-code:px-1.5 prose-code:py-0.5 prose-code:rounded prose-code:text-sm prose-code:font-medium prose-code:whitespace-nowrap
518+
prose-pre:bg-slate-900 prose-pre:shadow-lg
519+
prose-blockquote:border-l-blue-400 prose-blockquote:bg-slate-50 prose-blockquote:py-1 prose-blockquote:not-italic
520+
prose-table:border-collapse prose-th:bg-slate-100 prose-th:border prose-th:border-slate-300 prose-th:px-3 prose-th:py-2
521+
prose-td:border prose-td:border-slate-200 prose-td:px-3 prose-td:py-2
522+
prose-img:rounded-lg prose-img:shadow-md
523+
prose-hr:border-slate-200">
524+
<ReactMarkdown
525+
remarkPlugins={[remarkGfm]}
526+
components={{
527+
code({ node, inline, className, children, ...props }: any) {
528+
const match = /language-(\w+)/.exec(className || '');
529+
const codeString = String(children).replace(/\n$/, '');
530+
const shouldRenderInline = inline ?? (!match && !codeString.includes("\n"));
531+
532+
if (shouldRenderInline) {
533+
return (
534+
<code className="text-slate-700 bg-slate-100 px-1.5 py-0.5 rounded text-sm font-mono inline" {...props}>
535+
{children}
536+
</code>
537+
);
538+
}
539+
540+
// 有指定语言的代码块才高亮
541+
if (match) {
542+
return (
543+
<SyntaxHighlighter
544+
{...props}
545+
style={vscDarkPlus}
546+
language={match[1]}
547+
PreTag="div"
548+
customStyle={{
549+
borderRadius: '0.5rem',
550+
padding: '1rem',
551+
fontSize: '0.8rem',
552+
margin: '0.5rem 0',
553+
overflow: 'auto',
554+
maxWidth: '100%'
555+
}}
556+
>
557+
{codeString}
558+
</SyntaxHighlighter>
559+
);
560+
}
561+
562+
// 无语言标记的代码块,以普通文本显示(不高亮)
563+
return (
564+
<pre className="bg-transparent text-slate-700 p-0 overflow-x-auto text-sm whitespace-pre font-sans leading-relaxed">
565+
{codeString}
566+
</pre>
567+
);
568+
},
569+
p: ({ children }) => (
570+
<p className="text-gray-700 leading-relaxed m-0 inline-block !whitespace-nowrap">
571+
{children}
572+
</p>
573+
),
574+
ul: ({ children }) => (
575+
<ul className="my-2 pl-5 list-disc overflow-x-auto !whitespace-nowrap">
576+
{children}
577+
</ul>
578+
),
579+
ol: ({ children }) => (
580+
<ol className="my-2 pl-5 list-decimal overflow-x-auto !whitespace-nowrap">
581+
{children}
582+
</ol>
583+
),
584+
li: ({ children }) => (
585+
<li className="!whitespace-nowrap">
586+
{children}
587+
</li>
588+
),
589+
br: () => <span> </span>,
590+
a: ({ href, children }) => (
591+
<a
592+
href={href}
593+
className="text-blue-600 hover:text-blue-800 hover:underline transition-colors"
594+
target="_blank"
595+
rel="noopener noreferrer"
596+
>
597+
{children}
598+
</a>
599+
),
600+
table: ({ children }) => (
601+
<div className="overflow-x-auto my-4 rounded border border-slate-200">
602+
<table className="min-w-full">{children}</table>
603+
</div>
604+
),
605+
thead: ({ children }) => (
606+
<thead className="bg-slate-50">{children}</thead>
607+
),
608+
th: ({ children }) => (
609+
<th className="px-4 py-2 text-left text-sm font-semibold text-slate-700 border-b border-slate-200">{children}</th>
610+
),
611+
td: ({ children }) => (
612+
<td className="px-4 py-2 text-sm text-slate-600 border-b border-slate-100">{children}</td>
613+
),
614+
blockquote: ({ children }) => (
615+
<blockquote className="border-l-4 border-blue-400 bg-slate-50 pl-4 py-2 my-4 text-slate-600 italic rounded-r">{children}</blockquote>
616+
),
617+
}}
618+
>
619+
{squashSoftLineBreaksOutsideFences(item.text ?? "")}
620+
</ReactMarkdown>
621+
</div>
622+
</div>
623+
</div>
624+
);
625+
})()}
626+
</div>
481627
) : (
482628
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
483629
{recallResults.map((item, idx) => (
484630
<Card key={idx} title={`${t("knowledgeBase.detail.recallTest.scoreLabel")}${item.score?.toFixed(4) ?? "-"}`}
485-
extra={<span style={{ fontSize: 12 }}>ID: {item.entity?.id ?? "-"}</span>}
631+
extra={<span style={{ fontSize: 12 }}>ID: {item.id ?? "-"}</span>}
486632
style={{ wordBreak: "break-all" }}
487633
>
488-
<div style={{ marginBottom: 8, fontWeight: 500 }}>{item.entity?.text ?? ""}</div>
634+
<div style={{ marginBottom: 8, fontWeight: 500 }}>{item.text ?? ""}</div>
489635
<div style={{ fontSize: 12, color: '#888' }}>
490-
{t("knowledgeBase.detail.recallTest.metadataLabel")} <pre style={{ whiteSpace: 'pre-wrap', wordBreak: 'break-all', margin: 0 }}>{item.entity?.metadata}</pre>
636+
{t("knowledgeBase.detail.recallTest.metadataLabel")} <pre style={{ whiteSpace: 'pre-wrap', wordBreak: 'break-all', margin: 0 }}>{JSON.stringify(item.metadata, null, 2)}</pre>
491637
</div>
492638
</Card>
493639
))}

frontend/src/pages/KnowledgeBase/knowledge-base.api.ts

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { get, post, put, del } from "@/utils/request";
2+
import type { UnifiedSearchResult } from "./knowledge-base.model";
23

34
// 获取知识库列表
45
export function queryKnowledgeBasesUsingPost(params: any) {
@@ -59,21 +60,17 @@ export function deleteKnowledgeBaseFileByIdUsingDelete(baseId: string, data: obj
5960
return (del as unknown as (url: string, data?: object | null) => Promise<unknown>)(`/api/knowledge-base/${baseId}/files`, data ?? null);
6061
}
6162

62-
export function fetchKnowledgeGraph(data: { knowledge_base_id: string; query: string }) {
63-
return post("/api/rag/query", data);
64-
}
65-
66-
// 检索知识库内容
63+
// 检索知识库内容(统一检索接口)
6764
export function retrieveKnowledgeBaseContent(data: {
6865
query: string;
6966
topK?: number;
7067
threshold?: number;
7168
knowledgeBaseIds: string[];
72-
}) {
69+
}): Promise<UnifiedSearchResult[]> {
7370
return post("/api/knowledge-base/retrieve", data);
7471
}
7572

76-
// 新增:获取知识库文件详情(分页的切片数据)
73+
// 获取知识库文件详情(分页的切片数据)
7774
export function queryKnowledgeBaseFileDetailUsingGet(
7875
knowledgeBaseId: string,
7976
ragFileId: string,
@@ -83,3 +80,10 @@ export function queryKnowledgeBaseFileDetailUsingGet(
8380
const size = params.size ?? 20;
8481
return get(`/api/knowledge-base/${knowledgeBaseId}/files/${ragFileId}?page=${page}&page_size=${size}`);
8582
}
83+
84+
export function queryKnowledgeBase(data: {
85+
knowledge_base_id: string;
86+
query: string;
87+
}) {
88+
return post("/api/knowledge-base/query", data);
89+
}

frontend/src/pages/KnowledgeBase/knowledge-base.model.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@ export enum KBType {
1010
GRAPH = "GRAPH",
1111
}
1212

13+
export interface UnifiedSearchResult {
14+
id: string;
15+
text: string;
16+
score: number;
17+
metadata: Record<string, any>;
18+
resultType: "vector" | "graph";
19+
knowledgeBaseId: string;
20+
knowledgeBaseName: string;
21+
}
22+
1323
export interface KnowledgeBaseItem {
1424
id: string;
1525
name: string;

runtime/datamate-python/app/core/exception/codes.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,12 @@ def __init__(self):
133133
RAG_EMBEDDING_FAILED: Final = ErrorCode(
134134
"rag.0013", "Embedding generation failed", 500
135135
)
136+
RAG_UNSUPPORTED_TYPE: Final = ErrorCode(
137+
"rag.0014", "Unsupported RAG type", 400
138+
)
139+
RAG_INVALID_REQUEST: Final = ErrorCode(
140+
"rag.0015", "Invalid request", 400
141+
)
136142

137143
# ========== 配比模块 ==========
138144
RATIO_TASK_NOT_FOUND: Final = ErrorCode("ratio.0001", "Ratio task not found", 404)

runtime/datamate-python/app/module/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from .generation.interface import router as generation_router
77
from .evaluation.interface import router as evaluation_router
88
from .collection.interface import router as collection_route
9-
from .rag.interface.rag_interface import router as rag_router
109
from .operator.interface import operator_router
1110
from .operator.interface import category_router
1211
from .cleaning.interface import router as cleaning_router
@@ -22,11 +21,9 @@
2221
router.include_router(generation_router)
2322
router.include_router(evaluation_router)
2423
router.include_router(collection_route)
25-
router.include_router(rag_router)
2624
router.include_router(operator_router)
2725
router.include_router(category_router)
2826
router.include_router(cleaning_router)
29-
3027
router.include_router(knowledge_base_router)
3128

3229
__all__ = ["router"]

runtime/datamate-python/app/module/rag/interface/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
集中导出所有 API 路由
55
"""
66
from .knowledge_base import router as knowledge_base_router
7-
from .rag_interface import router as graph_rag_router
87

98
__all__ = [
109
"knowledge_base_router",
11-
"graph_rag_router",
1210
]

0 commit comments

Comments
 (0)