Skip to content

Commit 07556ec

Browse files
committed
refactor: ♻️ 对RAG的ETL流程进行再次优化
1 parent eda4f86 commit 07556ec

36 files changed

Lines changed: 984 additions & 465 deletions

.gitattributes

Lines changed: 0 additions & 2 deletions
This file was deleted.

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,6 @@ nacos-config/common-secret-prod.properties
8787
.env
8888
.env.*
8989
!.env.example
90+
91+
# Data
92+
data/

algorithm-api/algorithm-api-ai/src/main/java/com/stephen/cloud/api/ai/model/dto/AiChatRecordDTO.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,10 @@ public class AiChatRecordDTO implements Serializable {
8080
*/
8181
@Schema(description = "生成消耗 token")
8282
private Integer completionTokens;
83+
84+
/**
85+
* 检索元数据(来源分片、分数等)
86+
*/
87+
@Schema(description = "检索元数据")
88+
private String retrievalMetadata;
8389
}

algorithm-api/algorithm-api-ai/src/main/java/com/stephen/cloud/api/knowledge/model/dto/chunk/DocumentChunkAddRequest.java

Lines changed: 0 additions & 48 deletions
This file was deleted.

algorithm-api/algorithm-api-ai/src/main/java/com/stephen/cloud/api/knowledge/model/dto/chunk/DocumentChunkQueryRequest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,17 @@ public class DocumentChunkQueryRequest extends PageRequest implements Serializab
3838
*/
3939
@Schema(description = "分片序号")
4040
private Integer chunkIndex;
41+
42+
/**
43+
* 标签过滤 (支持模糊匹配)
44+
*/
45+
@Schema(description = "标签过滤")
46+
private String tags;
47+
48+
/**
49+
* 是否包含代码
50+
*/
51+
@Schema(description = "是否包含代码")
52+
private Boolean hasCode;
4153
}
4254

algorithm-api/algorithm-api-ai/src/main/java/com/stephen/cloud/api/knowledge/model/dto/chunk/DocumentChunkUpdateRequest.java

Lines changed: 0 additions & 47 deletions
This file was deleted.

algorithm-api/algorithm-api-ai/src/main/java/com/stephen/cloud/api/knowledge/model/dto/knowledgebase/KnowledgeBaseAddRequest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,29 @@ public class KnowledgeBaseAddRequest implements Serializable {
2929
*/
3030
@Schema(description = "知识库描述")
3131
private String description;
32+
33+
/**
34+
* 向量相似度模式 (cosine/euclidean/dot_product)
35+
*/
36+
@Schema(description = "向量相似度模式")
37+
private String similarityMode;
38+
39+
/**
40+
* 分片策略 (token/recursive/semantic)
41+
*/
42+
@Schema(description = "分片策略")
43+
private String chunkStrategy;
44+
45+
/**
46+
* 分片大小
47+
*/
48+
@Schema(description = "分片大小")
49+
private Integer chunkSize;
50+
51+
/**
52+
* 分片重叠
53+
*/
54+
@Schema(description = "分片重叠")
55+
private Integer chunkOverlap;
3256
}
3357

algorithm-api/algorithm-api-ai/src/main/java/com/stephen/cloud/api/knowledge/model/dto/knowledgebase/KnowledgeBaseUpdateRequest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,29 @@ public class KnowledgeBaseUpdateRequest implements Serializable {
4444
*/
4545
@Schema(description = "状态 (0-启用, 1-禁用等)")
4646
private Integer status;
47+
48+
/**
49+
* 向量相似度模式
50+
*/
51+
@Schema(description = "向量相似度模式")
52+
private String similarityMode;
53+
54+
/**
55+
* 分片策略
56+
*/
57+
@Schema(description = "分片策略")
58+
private String chunkStrategy;
59+
60+
/**
61+
* 分片大小
62+
*/
63+
@Schema(description = "分片大小")
64+
private Integer chunkSize;
65+
66+
/**
67+
* 分片重叠
68+
*/
69+
@Schema(description = "分片重叠")
70+
private Integer chunkOverlap;
4771
}
4872

algorithm-api/algorithm-api-ai/src/main/java/com/stephen/cloud/api/knowledge/model/dto/knowledgedocument/KnowledgeDocumentAddRequest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,16 @@ public class KnowledgeDocumentAddRequest implements Serializable {
4545
*/
4646
@Schema(description = "文件大小 (字节)")
4747
private Long sizeBytes;
48+
49+
/**
50+
* 文档标签 (逗号分隔,如:算法,动态规划,LeetCode)
51+
*/
52+
@Schema(description = "文档标签")
53+
private String tags;
54+
55+
/**
56+
* 是否包含代码
57+
*/
58+
@Schema(description = "是否包含代码")
59+
private Boolean hasCode;
4860
}

algorithm-api/algorithm-api-ai/src/main/java/com/stephen/cloud/api/knowledge/model/dto/knowledgedocument/KnowledgeDocumentUpdateRequest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,16 @@ public class KnowledgeDocumentUpdateRequest implements Serializable {
4545
*/
4646
@Schema(description = "错误信息")
4747
private String errorMsg;
48+
49+
/**
50+
* 文档标签 (逗号分隔)
51+
*/
52+
@Schema(description = "文档标签")
53+
private String tags;
54+
55+
/**
56+
* 是否包含代码
57+
*/
58+
@Schema(description = "是否包含代码")
59+
private Boolean hasCode;
4860
}

0 commit comments

Comments
 (0)