Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 116 additions & 2 deletions backend/apps/terminology/api/terminology.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import asyncio
import hashlib
import io
import os
import uuid
from http.client import HTTPException
from typing import Optional

import pandas as pd
from fastapi import APIRouter, Query
from fastapi import APIRouter, File, UploadFile, Query
from fastapi.responses import StreamingResponse

from apps.chat.models.chat_model import AxisObj
from apps.terminology.curd.terminology import page_terminology, create_terminology, update_terminology, \
delete_terminology, enable_terminology, get_all_terminology
delete_terminology, enable_terminology, get_all_terminology, batch_create_terminology
from apps.terminology.models.terminology_model import TerminologyInfo
from common.core.config import settings
from common.core.deps import SessionDep, CurrentUser, Trans
from common.utils.data_format import DataFormat

Expand Down Expand Up @@ -89,3 +94,112 @@ def inner():

result = await asyncio.to_thread(inner)
return StreamingResponse(result, media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")


path = settings.EXCEL_PATH

from sqlalchemy.orm import sessionmaker, scoped_session
from common.core.db import engine
from sqlmodel import Session

session_maker = scoped_session(sessionmaker(bind=engine, class_=Session))


@router.post("/uploadExcel")
async def upload_excel(trans: Trans, current_user: CurrentUser, file: UploadFile = File(...)):
ALLOWED_EXTENSIONS = {"xlsx", "xls"}
if not file.filename.lower().endswith(tuple(ALLOWED_EXTENSIONS)):
raise HTTPException(400, "Only support .xlsx/.xls")

os.makedirs(path, exist_ok=True)
base_filename = f"{file.filename.split('.')[0]}_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()[:10]}"
filename = f"{base_filename}.{file.filename.split('.')[1]}"
save_path = os.path.join(path, filename)
with open(save_path, "wb") as f:
f.write(await file.read())

oid = current_user.oid

use_cols = [0, 1, 2, 3, 4]

def inner():

session = session_maker()

sheet_names = pd.ExcelFile(save_path).sheet_names

import_data = []

for sheet_name in sheet_names:

df = pd.read_excel(
save_path,
sheet_name=sheet_name,
engine='calamine',
header=0,
usecols=use_cols,
dtype=str
).fillna("")

for index, row in df.iterrows():
# 跳过空行
if row.isnull().all():
continue

word = row[0].strip() if pd.notna(row[0]) and row[0].strip() else None
other_words = [w.strip() for w in row[1].strip().split(',')] if pd.notna(row[1]) and row[
1].strip() else []
description = row[2].strip() if pd.notna(row[2]) and row[2].strip() else None
datasource_names = [d.strip() for d in row[3].strip().split(',')] if pd.notna(row[3]) and row[
3].strip() else []
all_datasource = True if pd.notna(row[4]) and row[4].lower().strip() in ['y', 'yes', 'true'] else False
specific_ds = False if all_datasource else True

import_data.append(TerminologyInfo(word=word, description=description, other_words=other_words,
datasource_names=datasource_names, specific_ds=specific_ds))

res = batch_create_terminology(session, import_data, oid, trans)

failed_records = res['failed_records']

error_excel_filename = None

if len(failed_records) > 0:
data_list = []
for obj in failed_records:
_data = {
"word": obj['data'].word,
"other_words": ', '.join(obj['data'].other_words) if obj['data'].other_words else '',
"description": obj['data'].description,
"all_data_sources": 'N' if obj['data'].specific_ds else 'Y',
"datasource": ', '.join(obj['data'].datasource_names) if obj['data'].datasource_names and obj[
'data'].specific_ds else '',
"errors": obj['errors']
}
data_list.append(_data)

fields = []
fields.append(AxisObj(name=trans('i18n_terminology.term_name'), value='word'))
fields.append(AxisObj(name=trans('i18n_terminology.synonyms'), value='other_words'))
fields.append(AxisObj(name=trans('i18n_terminology.term_description'), value='description'))
fields.append(AxisObj(name=trans('i18n_terminology.effective_data_sources'), value='datasource'))
fields.append(AxisObj(name=trans('i18n_terminology.all_data_sources'), value='all_data_sources'))
fields.append(AxisObj(name=trans('i18n_data_training.error_info'), value='errors'))

md_data, _fields_list = DataFormat.convert_object_array_for_pandas(fields, data_list)

df = pd.DataFrame(md_data, columns=_fields_list)
error_excel_filename = f"{base_filename}_error.xlsx"
save_error_path = os.path.join(path, error_excel_filename)
# 保存 DataFrame 到 Excel
df.to_excel(save_error_path, index=False)

return {
'success_count': res['success_count'],
'failed_count': len(failed_records),
'duplicate_count': res['duplicate_count'],
'original_count': res['original_count'],
'error_excel_filename': error_excel_filename,
}

return await asyncio.to_thread(inner)
Loading