Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions backend/apps/chat/api/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,13 @@ async def export_excel(session: SessionDep, chat_record_id: int, trans: Trans):

def inner():

data, _fields_list, col_formats = LLMService.format_pd_data(fields, _data + _predict_data)
data_list = LLMService.convert_large_numbers_in_object_array(_data + _predict_data)

df = pd.DataFrame(data, columns=_fields_list)
md_data, _fields_list = LLMService.convert_object_array_for_pandas(fields, data_list)

# data, _fields_list, col_formats = LLMService.format_pd_data(fields, _data + _predict_data)

df = pd.DataFrame(md_data, columns=_fields_list)

buffer = io.BytesIO()

Expand All @@ -256,14 +260,14 @@ def inner():
df.to_excel(writer, sheet_name='Sheet1', index=False)

# 获取 xlsxwriter 的工作簿和工作表对象
workbook = writer.book
worksheet = writer.sheets['Sheet1']

for col_idx, fmt_type in col_formats.items():
if fmt_type == 'text':
worksheet.set_column(col_idx, col_idx, None, workbook.add_format({'num_format': '@'}))
elif fmt_type == 'number':
worksheet.set_column(col_idx, col_idx, None, workbook.add_format({'num_format': '0'}))
# workbook = writer.book
# worksheet = writer.sheets['Sheet1']
#
# for col_idx, fmt_type in col_formats.items():
# if fmt_type == 'text':
# worksheet.set_column(col_idx, col_idx, None, workbook.add_format({'num_format': '@'}))
# elif fmt_type == 'number':
# worksheet.set_column(col_idx, col_idx, None, workbook.add_format({'num_format': '0'}))

buffer.seek(0)
return io.BytesIO(buffer.getvalue())
Expand Down
82 changes: 75 additions & 7 deletions backend/apps/chat/task/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,10 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
return

result = self.execute_sql(sql=real_execute_sql)

_data = self.convert_large_numbers_in_object_array(result.get('data'))
result["data"] = _data

self.save_sql_data(session=_session, data_obj=result)
if in_chat:
yield 'data:' + orjson.dumps({'content': 'execute-success', 'type': 'sql-data'}).decode() + '\n\n'
Expand All @@ -1053,12 +1057,14 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
for field in result.get('fields'):
_column_list.append(AxisObj(name=field, value=field))

data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
md_data, _fields_list = self.convert_object_array_for_pandas(_column_list, result.get('data'))

# data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))

if not data or not _fields_list:
if not _data or not _fields_list:
yield 'The SQL execution result is empty.\n\n'
else:
df = pd.DataFrame(data, columns=_fields_list)
df = pd.DataFrame(_data, columns=_fields_list)
df_safe = self.safe_convert_to_string(df)
markdown_table = df_safe.to_markdown(index=False)
yield markdown_table + '\n\n'
Expand Down Expand Up @@ -1091,7 +1097,6 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
{'content': orjson.dumps(chart).decode(), 'type': 'chart'}).decode() + '\n\n'
else:
if stream:
data = []
_fields = {}
if chart.get('columns'):
for _column in chart.get('columns'):
Expand All @@ -1110,12 +1115,14 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
_column_list.append(
AxisObj(name=field if not _fields.get(field) else _fields.get(field), value=field))

data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
md_data, _fields_list = self.convert_object_array_for_pandas(_column_list, result.get('data'))

if not data or not _fields_list:
# data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))

if not md_data or not _fields_list:
yield 'The SQL execution result is empty.\n\n'
else:
df = pd.DataFrame(data, columns=_fields_list)
df = pd.DataFrame(md_data, columns=_fields_list)
df_safe = self.safe_convert_to_string(df)
markdown_table = df_safe.to_markdown(index=False)
yield markdown_table + '\n\n'
Expand Down Expand Up @@ -1183,6 +1190,67 @@ def safe_convert_to_string(df):

return df_copy

@staticmethod
def convert_large_numbers_in_object_array(obj_array, int_threshold=1e15, float_threshold=1e10):
"""处理对象数组,将每个对象中的大数字转换为字符串"""

def format_float_without_scientific(value):
"""格式化浮点数,避免科学记数法"""
if value == 0:
return "0"
formatted = f"{value:.15f}"
if '.' in formatted:
formatted = formatted.rstrip('0').rstrip('.')
return formatted

def process_object(obj):
"""处理单个对象"""
if not isinstance(obj, dict):
return obj

processed_obj = {}
for key, value in obj.items():
if isinstance(value, (int, float)):
# 只转换大数字
if isinstance(value, int) and abs(value) >= int_threshold:
processed_obj[key] = str(value)
elif isinstance(value, float) and (abs(value) >= float_threshold or abs(value) < 1e-6):
processed_obj[key] = format_float_without_scientific(value)
else:
processed_obj[key] = value
elif isinstance(value, dict):
# 处理嵌套对象
processed_obj[key] = process_object(value)
elif isinstance(value, list):
# 处理对象中的数组
processed_obj[key] = [process_item(item) for item in value]
else:
processed_obj[key] = value
return processed_obj

def process_item(item):
"""处理数组中的项目"""
if isinstance(item, dict):
return process_object(item)
return item

return [process_item(obj) for obj in obj_array]

@staticmethod
def convert_object_array_for_pandas(column_list: list, data_list: list):
_fields_list = []
for field_idx, field in enumerate(column_list):
_fields_list.append(field.name)

md_data = []
for inner_data in data_list:
_row = []
for field_idx, field in enumerate(column_list):
value = inner_data.get(field.value)
_row.append(value)
md_data.append(_row)
return md_data, _fields_list

@staticmethod
def format_pd_data(column_list: list, data_list: list, col_formats: dict = None):
# 预处理数据并记录每列的格式类型
Expand Down