Skip to content

Commit ae4f846

Browse files
committed
feat: ✨ 添加基于 ES 的向量数据库的实现
1 parent e558d8b commit ae4f846

14 files changed

Lines changed: 199 additions & 49 deletions
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package com.stephen.cloud.ai.config;
2+
3+
import lombok.Data;
4+
import org.springframework.boot.context.properties.ConfigurationProperties;
5+
import org.springframework.stereotype.Component;
6+
7+
@Data
8+
@Component
9+
@ConfigurationProperties(prefix = "document.processing")
10+
public class DocumentProcessingProperties {
11+
12+
private int chunkSize = 800;
13+
14+
private int overlapSize = 100;
15+
16+
private long maxFileSize = 10485760L;
17+
18+
private String uploadPath = "uploads/knowledge";
19+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package com.stephen.cloud.ai.config;
2+
3+
import lombok.Data;
4+
import org.springframework.boot.context.properties.ConfigurationProperties;
5+
import org.springframework.stereotype.Component;
6+
7+
@Data
8+
@Component
9+
@ConfigurationProperties(prefix = "rag.retrieval")
10+
public class RagRetrievalProperties {
11+
12+
private int topK = 5;
13+
14+
private Double similarityThreshold = 0.7D;
15+
}

algorithm-service/algorithm-ai-service/src/main/java/com/stephen/cloud/ai/config/VectorStoreConfig.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,22 @@
33
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
44
import org.springframework.context.annotation.Bean;
55
import org.springframework.context.annotation.Configuration;
6+
import jakarta.annotation.Resource;
67

78
@Configuration
89
public class VectorStoreConfig {
910

11+
@Resource
12+
private DocumentProcessingProperties documentProcessingProperties;
13+
1014
@Bean
1115
public TokenTextSplitter tokenTextSplitter() {
12-
return new TokenTextSplitter(800, 100, 5, 10000, true);
16+
return new TokenTextSplitter(
17+
documentProcessingProperties.getChunkSize(),
18+
documentProcessingProperties.getOverlapSize(),
19+
5,
20+
10000,
21+
true
22+
);
1323
}
1424
}

algorithm-service/algorithm-ai-service/src/main/java/com/stephen/cloud/ai/controller/DocumentController.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,11 @@ public BaseResponse<Boolean> deleteDocument(@RequestBody DeleteRequest deleteReq
4040
if (deleteRequest == null || deleteRequest.getId() <= 0) {
4141
throw new BusinessException(ErrorCode.PARAMS_ERROR);
4242
}
43-
Long id = deleteRequest.getId();
44-
Document oldDocument = documentService.getById(id);
45-
ThrowUtils.throwIf(oldDocument == null, ErrorCode.NOT_FOUND_ERROR);
46-
Long userId = SecurityUtils.getLoginUserId();
47-
ThrowUtils.throwIf(!oldDocument.getUserId().equals(userId) && !SecurityUtils.isAdmin(), ErrorCode.NO_AUTH_ERROR);
48-
return ResultUtils.success(documentService.removeById(id));
43+
return ResultUtils.success(documentService.deleteDocumentById(
44+
deleteRequest.getId(),
45+
SecurityUtils.getLoginUserId(),
46+
SecurityUtils.isAdmin()
47+
));
4948
}
5049

5150
@GetMapping("/get/vo")

algorithm-service/algorithm-ai-service/src/main/java/com/stephen/cloud/ai/controller/KnowledgeBaseController.java

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
package com.stephen.cloud.ai.controller;
22

33
import cn.dev33.satoken.annotation.SaCheckRole;
4-
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
54
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
65
import com.stephen.cloud.ai.convert.KnowledgeBaseConvert;
76
import com.stephen.cloud.ai.model.entity.KnowledgeBase;
8-
import com.stephen.cloud.ai.service.DocumentService;
97
import com.stephen.cloud.ai.service.KnowledgeBaseService;
10-
import com.stephen.cloud.api.ai.model.dto.document.DocumentQueryRequest;
118
import com.stephen.cloud.api.ai.model.dto.knowledgebase.KnowledgeBaseAddRequest;
129
import com.stephen.cloud.api.ai.model.dto.knowledgebase.KnowledgeBaseEditRequest;
1310
import com.stephen.cloud.api.ai.model.dto.knowledgebase.KnowledgeBaseQueryRequest;
@@ -32,9 +29,6 @@ public class KnowledgeBaseController {
3229
@Resource
3330
private KnowledgeBaseService knowledgeBaseService;
3431

35-
@Resource
36-
private DocumentService documentService;
37-
3832
@PostMapping("/add")
3933
@Operation(summary = "创建知识库")
4034
@OperationLog(module = "知识库管理", action = "创建知识库")
@@ -56,15 +50,11 @@ public BaseResponse<Boolean> deleteKnowledgeBase(@RequestBody DeleteRequest dele
5650
if (deleteRequest == null || deleteRequest.getId() <= 0) {
5751
throw new BusinessException(ErrorCode.PARAMS_ERROR);
5852
}
59-
Long id = deleteRequest.getId();
60-
KnowledgeBase oldKnowledgeBase = knowledgeBaseService.getById(id);
61-
ThrowUtils.throwIf(oldKnowledgeBase == null, ErrorCode.NOT_FOUND_ERROR);
62-
Long userId = SecurityUtils.getLoginUserId();
63-
ThrowUtils.throwIf(!oldKnowledgeBase.getUserId().equals(userId) && !SecurityUtils.isAdmin(), ErrorCode.NO_AUTH_ERROR);
64-
documentService.remove(new LambdaQueryWrapper<com.stephen.cloud.ai.model.entity.Document>()
65-
.eq(com.stephen.cloud.ai.model.entity.Document::getKnowledgeBaseId, id));
66-
boolean result = knowledgeBaseService.removeById(id);
67-
return ResultUtils.success(result);
53+
return ResultUtils.success(knowledgeBaseService.deleteKnowledgeBaseById(
54+
deleteRequest.getId(),
55+
SecurityUtils.getLoginUserId(),
56+
SecurityUtils.isAdmin()
57+
));
6858
}
6959

7060
@PostMapping("/update")

algorithm-service/algorithm-ai-service/src/main/java/com/stephen/cloud/ai/knowledge/etl/DocumentETLPipeline.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import java.util.List;
1313
import java.util.Map;
14+
import java.util.UUID;
1415

1516
@Component
1617
public class DocumentETLPipeline {
@@ -35,6 +36,12 @@ public int process(String filePath, String fileExtension, Map<String, Object> me
3536
document.getMetadata().putAll(metadata);
3637
}
3738
List<Document> chunks = tokenTextSplitter.apply(documents);
39+
for (int i = 0; i < chunks.size(); i++) {
40+
Document chunk = chunks.get(i);
41+
chunk.getMetadata().put("chunkIndex", i);
42+
String chunkId = metadata.get("documentId") + "_" + i + "_" + UUID.randomUUID();
43+
chunk.getMetadata().put("chunkId", chunkId);
44+
}
3845
vectorStore.add(chunks);
3946
return chunks.size();
4047
}

algorithm-service/algorithm-ai-service/src/main/java/com/stephen/cloud/ai/service/DocumentService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ public interface DocumentService extends IService<Document> {
2222
Page<DocumentVO> getDocumentVOPage(Page<Document> page, HttpServletRequest request);
2323

2424
void sendDocumentProcessMessage(Long documentId);
25+
26+
boolean deleteDocumentById(Long id, Long loginUserId, boolean isAdmin);
2527
}

algorithm-service/algorithm-ai-service/src/main/java/com/stephen/cloud/ai/service/KnowledgeBaseService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,6 @@ public interface KnowledgeBaseService extends IService<KnowledgeBase> {
1919
Page<KnowledgeBaseVO> getKnowledgeBaseVOPage(Page<KnowledgeBase> page, HttpServletRequest request);
2020

2121
boolean isNameUnique(String name, Long excludeId);
22+
23+
boolean deleteKnowledgeBaseById(Long id, Long loginUserId, boolean isAdmin);
2224
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package com.stephen.cloud.ai.service;
2+
3+
import org.springframework.ai.document.Document;
4+
5+
import java.util.List;
6+
7+
public interface VectorStoreService {
8+
9+
List<Document> similaritySearch(String query, Long knowledgeBaseId, Integer topK, Double similarityThreshold);
10+
11+
void deleteByKnowledgeBaseId(Long knowledgeBaseId);
12+
13+
void deleteByDocumentId(Long documentId);
14+
}

algorithm-service/algorithm-ai-service/src/main/java/com/stephen/cloud/ai/service/impl/DocumentServiceImpl.java

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
55
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
66
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
7+
import com.stephen.cloud.ai.config.DocumentProcessingProperties;
78
import com.stephen.cloud.ai.convert.DocumentConvert;
89
import com.stephen.cloud.ai.mapper.DocumentMapper;
910
import com.stephen.cloud.ai.model.entity.Document;
1011
import com.stephen.cloud.ai.mq.DocumentProcessProducer;
1112
import com.stephen.cloud.ai.mq.model.DocumentProcessMessage;
1213
import com.stephen.cloud.ai.service.DocumentService;
14+
import com.stephen.cloud.ai.service.VectorStoreService;
1315
import com.stephen.cloud.api.ai.model.dto.document.DocumentQueryRequest;
1416
import com.stephen.cloud.api.ai.model.vo.DocumentVO;
1517
import com.stephen.cloud.api.user.client.UserFeignClient;
@@ -22,8 +24,8 @@
2224
import jakarta.servlet.http.HttpServletRequest;
2325
import org.apache.commons.io.FilenameUtils;
2426
import org.apache.commons.lang3.StringUtils;
25-
import org.springframework.beans.factory.annotation.Value;
2627
import org.springframework.stereotype.Service;
28+
import org.springframework.transaction.annotation.Transactional;
2729
import org.springframework.web.multipart.MultipartFile;
2830

2931
import java.io.File;
@@ -38,29 +40,29 @@ public class DocumentServiceImpl extends ServiceImpl<DocumentMapper, Document> i
3840

3941
private static final Set<String> SUPPORTED_EXTENSIONS = Set.of("pdf", "doc", "docx", "md", "txt", "ppt", "pptx", "html");
4042

41-
@Value("${document.processing.max-file-size:10485760}")
42-
private long maxFileSize;
43-
44-
@Value("${document.processing.upload-path:uploads/knowledge}")
45-
private String uploadPath;
43+
@Resource
44+
private DocumentProcessingProperties documentProcessingProperties;
4645

4746
@Resource
4847
private DocumentProcessProducer documentProcessProducer;
4948

5049
@Resource
5150
private UserFeignClient userFeignClient;
5251

52+
@Resource
53+
private VectorStoreService vectorStoreService;
54+
5355
@Override
5456
public Long uploadDocument(MultipartFile file, Long knowledgeBaseId, Long userId) {
5557
ThrowUtils.throwIf(file == null || file.isEmpty(), ErrorCode.PARAMS_ERROR, "文件不能为空");
5658
ThrowUtils.throwIf(knowledgeBaseId == null || knowledgeBaseId <= 0, ErrorCode.PARAMS_ERROR);
57-
ThrowUtils.throwIf(file.getSize() > maxFileSize, ErrorCode.PARAMS_ERROR, "文件大小超过限制");
59+
ThrowUtils.throwIf(file.getSize() > documentProcessingProperties.getMaxFileSize(), ErrorCode.PARAMS_ERROR, "文件大小超过限制");
5860
String originalFilename = file.getOriginalFilename();
5961
String extension = StringUtils.lowerCase(FilenameUtils.getExtension(originalFilename));
6062
ThrowUtils.throwIf(StringUtils.isBlank(extension) || !SUPPORTED_EXTENSIONS.contains(extension),
6163
ErrorCode.PARAMS_ERROR, "不支持的文件格式");
6264
try {
63-
Path saveDir = Path.of(uploadPath, String.valueOf(knowledgeBaseId));
65+
Path saveDir = Path.of(documentProcessingProperties.getUploadPath(), String.valueOf(knowledgeBaseId));
6466
Files.createDirectories(saveDir);
6567
String saveName = System.currentTimeMillis() + "_" + UUID.randomUUID() + "." + extension;
6668
Path savePath = saveDir.resolve(saveName);
@@ -171,4 +173,15 @@ public void sendDocumentProcessMessage(Long documentId) {
171173
message.setFileExtension(document.getFileExtension());
172174
documentProcessProducer.sendMessage(message);
173175
}
176+
177+
@Override
178+
@Transactional(rollbackFor = Exception.class)
179+
public boolean deleteDocumentById(Long id, Long loginUserId, boolean isAdmin) {
180+
ThrowUtils.throwIf(id == null || id <= 0, ErrorCode.PARAMS_ERROR);
181+
Document oldDocument = this.getById(id);
182+
ThrowUtils.throwIf(oldDocument == null, ErrorCode.NOT_FOUND_ERROR);
183+
ThrowUtils.throwIf(!Objects.equals(oldDocument.getUserId(), loginUserId) && !isAdmin, ErrorCode.NO_AUTH_ERROR);
184+
vectorStoreService.deleteByDocumentId(id);
185+
return this.removeById(id);
186+
}
174187
}

0 commit comments

Comments
 (0)