Skip to content

Commit 8b282dc

Browse files
committed
refactor(export_registration): refactor export registration data usecase
1 parent 4999cd8 commit 8b282dc

File tree

4 files changed

+146
-90
lines changed

4 files changed

+146
-90
lines changed

.isort.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
[settings]
2-
known_third_party = PIL,aws,boto3,botocore,constants,controller,dotenv,external_gateway,fastapi,fastapi_cloudauth,lambda_decorators,lambdawarmer,mangum,model,openpyxl,pandas,pydantic,pynamodb,pytz,repository,requests,starlette,typing_extensions,ulid,usecase,utils
2+
known_third_party = PIL,aws,boto3,botocore,constants,controller,dotenv,external_gateway,fastapi,fastapi_cloudauth,httpx,lambda_decorators,lambdawarmer,mangum,model,openpyxl,pandas,pydantic,pynamodb,pytz,repository,requests,starlette,typing_extensions,ulid,usecase,utils

backend/model/pycon_registrations/pycon_registration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,4 @@ class PyconExportData(BaseModel):
139139
contactNumber: str = Field(..., title='Contact Number')
140140
organization: str = Field(..., title='Affiliated Company or Organization')
141141
ticketType: TicketTypes = Field(title='Ticket Type')
142-
idURL: Optional[HttpUrl] = Field(None, title='ID URL')
142+
imageIdUrl: Optional[HttpUrl] = Field(None, title='ID URL')

backend/scripts/export_registrations_to_excel.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,14 @@
1313
args = parser.parse_args()
1414
load_dotenv(dotenv_path=args.env_file)
1515

16+
import asyncio
17+
1618
if __name__ == '__main__':
1719
from usecase.export_data_usecase import ExportDataUsecase
1820

19-
usecase = ExportDataUsecase()
21+
async def main():
22+
usecase = ExportDataUsecase()
23+
response = await usecase.export_registrations_to_excel(event_id=args.event_id, file_name=args.file_name)
24+
print(response)
2025

21-
response = usecase.export_registrations_to_excel(event_id=args.event_id, file_name=args.file_name)
26+
asyncio.run(main())
Lines changed: 137 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1+
import asyncio
12
import os
23
from http import HTTPStatus
34
from io import BytesIO
45
from pathlib import Path
56

7+
import httpx
68
import pandas as pd
7-
import requests
89
from fastapi.responses import JSONResponse
910
from model.pycon_registrations.pycon_registration import PyconExportData
10-
from model.registrations.registration import Registration
11-
from openpyxl import load_workbook
1211
from openpyxl.drawing.image import Image
1312
from PIL import Image as PilImage
1413
from repository.registrations_repository import RegistrationsRepository
@@ -20,105 +19,157 @@ class ExportDataUsecase:
2019
def __init__(self):
2120
self.__registrations_repository = RegistrationsRepository()
2221
self.__pycon_registration_usecase = PyconRegistrationUsecase()
22+
self.__FIXED_IMAGE_WIDTH_PX = 400
23+
self.__EXCEL_COLUMN_WIDTH_FACTOR = 0.15
24+
self.__EXCEL_ROW_HEIGHT_FACTOR = 0.75
25+
26+
async def export_registrations_to_excel(self, event_id: str, file_name: str):
27+
"""
28+
Exports an event's registration list to an Excel file, embedding ID images where available.
29+
:param event_id: The ID of the event to export registrations for.
30+
:param file_name: The desired name for the output Excel file (without extension).
31+
:return: JSONResponse indicating success or failure, with the file path if successful.
32+
"""
33+
try:
34+
registrations_data = self._fetch_and_prepare_data(event_id)
35+
if not registrations_data:
36+
logger.info('No registrations found to export.')
37+
return JSONResponse(status_code=HTTPStatus.OK, content={'message': 'No registrations to export.'})
2338

24-
def export_registrations_to_excel(self, event_id: str, file_name: str):
25-
reg_status, registration, reg_message = self.__registrations_repository.query_registrations(event_id=event_id)
39+
df, column_mapping = self._create_dataframe(registrations_data)
40+
output_path = await self._write_excel_with_images_async(df, file_name, column_mapping)
2641

27-
if reg_status != HTTPStatus.OK:
28-
return JSONResponse(status_code=reg_status, content={'message': reg_message})
42+
logger.info(f'Successfully exported data to {output_path}')
43+
return JSONResponse(
44+
status_code=HTTPStatus.OK, content={'message': f'Data exported to {Path(output_path).name}'}
45+
)
2946

30-
registration_with_presigned_url = [
31-
self.__pycon_registration_usecase.collect_pre_signed_url_pycon(registration=reg) for reg in registration
32-
]
47+
except ValueError as e:
48+
return JSONResponse(status_code=HTTPStatus.BAD_REQUEST, content={'message': str(e)})
49+
except Exception as e:
50+
logger.error(f'An unexpected error occurred during Excel export: {e}', exc_info=True)
51+
return JSONResponse(
52+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
53+
content={'message': f'An error occurred during Excel export: {e}'},
54+
)
55+
56+
def _fetch_and_prepare_data(self, event_id: str) -> list[PyconExportData]:
57+
status, registrations, message = self.__registrations_repository.query_registrations(event_id=event_id)
58+
if status != HTTPStatus.OK:
59+
raise ValueError(f'Failed to query registrations: {message}')
3360

34-
export_data_dicts = [
35-
{
36-
'firstName': reg.firstName,
37-
'lastName': reg.lastName,
38-
'nickname': reg.nickname,
39-
'jobTitle': reg.jobTitle,
40-
'email': reg.email,
41-
'contactNumber': reg.contactNumber,
42-
'organization': reg.organization,
43-
'ticketType': str(reg.ticketType),
44-
'idURL': reg.imageIdUrl,
45-
}
46-
for reg in registration_with_presigned_url
61+
registrations_with_url = [
62+
self.__pycon_registration_usecase.collect_pre_signed_url_pycon(registration=reg) for reg in registrations
4763
]
4864

49-
column_mapping = {}
50-
for field_name, field_info in PyconExportData.__fields__.items():
51-
if field_name != 'idURL':
52-
column_mapping[field_name] = field_info.field_info.title
65+
export_data = [
66+
PyconExportData(
67+
firstName=reg.firstName,
68+
lastName=reg.lastName,
69+
nickname=reg.nickname,
70+
jobTitle=reg.jobTitle,
71+
email=reg.email,
72+
contactNumber=reg.contactNumber,
73+
organization=reg.organization,
74+
ticketType=reg.ticketType,
75+
imageIdUrl=getattr(reg, 'imageIdUrl', None),
76+
)
77+
for reg in registrations_with_url
78+
]
5379

54-
df = pd.DataFrame(export_data_dicts)
80+
return export_data
5581

56-
df_to_excel = df.drop('idURL', axis=1)
82+
def _create_dataframe(self, data: list[PyconExportData]) -> tuple[pd.DataFrame, dict]:
83+
column_mapping = {
84+
field.name: field.field_info.title
85+
for field in PyconExportData.__fields__.values()
86+
if field.name != 'imageIdUrl'
87+
}
5788

58-
df_to_excel['ID Image'] = ''
59-
column_mapping['ID Image'] = 'ID Image'
89+
processed_records = []
90+
for item in data:
91+
record = item.dict()
92+
if 'ticketType' in record and hasattr(record['ticketType'], 'value'):
93+
record['ticketType'] = record['ticketType'].value
94+
processed_records.append(record)
6095

61-
df_to_excel.rename(columns=column_mapping, inplace=True)
96+
df = pd.DataFrame(processed_records)
97+
return df, column_mapping
6298

99+
async def _write_excel_with_images_async(self, df: pd.DataFrame, file_name: str, column_mapping: dict) -> str:
63100
output_file_name = Path(file_name).with_suffix('.xlsx').name
64101
output_path = os.path.join(os.getcwd(), output_file_name)
65102

66-
try:
67-
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
68-
df_to_excel.to_excel(writer, sheet_name='Registrations', index=False)
69-
70-
workbook = writer.book
71-
worksheet = writer.sheets['Registrations']
72-
73-
FIXED_IMAGE_WIDTH = 400
74-
75-
image_column_idx = df_to_excel.columns.get_loc('ID Image') + 1
76-
image_column_letter = chr(65 + image_column_idx - 1)
77-
worksheet.column_dimensions[image_column_letter].width = FIXED_IMAGE_WIDTH * 0.15
78-
79-
for index, row in df.iterrows():
80-
if row['idURL']:
81-
try:
82-
response = requests.get(row['idURL'])
83-
84-
if response.status_code == HTTPStatus.OK:
85-
image_content = response.content
86-
87-
with PilImage.open(BytesIO(image_content)) as pil_img:
88-
original_width, original_height = pil_img.size
89-
90-
new_height = int((FIXED_IMAGE_WIDTH / original_width) * original_height)
91-
92-
image_stream_for_openpyxl = BytesIO(image_content)
93-
94-
img = Image(image_stream_for_openpyxl)
95-
img.width = FIXED_IMAGE_WIDTH
96-
img.height = new_height
97-
98-
row_height_in_points = new_height * 0.75
99-
worksheet.row_dimensions[index + 2].height = row_height_in_points
100-
101-
cell = f'{image_column_letter}{index + 2}'
102-
worksheet.add_image(img, cell)
103+
df_to_excel = df.drop(columns=['imageIdUrl'], errors='ignore')
104+
df_to_excel['ID Image'] = ''
105+
df_to_excel.rename(columns=column_mapping, inplace=True)
103106

104-
else:
105-
logger.error(
106-
f"Failed to download image from {row['idURL']}. Status code: {response.status_code}"
107-
)
108-
worksheet.cell(
109-
row=index + 2, column=image_column_idx
110-
).value = f'Error: {response.status_code}'
107+
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
108+
df_to_excel.to_excel(writer, sheet_name='Registrations', index=False)
109+
worksheet = writer.sheets['Registrations']
110+
await self._embed_images_async(worksheet, df, df_to_excel.columns)
111111

112-
except Exception as e:
113-
logger.error(f"An error occurred while processing image from {row['idURL']}: {e}")
114-
worksheet.cell(row=index + 2, column=image_column_idx).value = f'Error: {str(e)}'
112+
return output_path
115113

116-
logger.info(f'Successfully exported data to {output_path}')
117-
return JSONResponse(status_code=HTTPStatus.OK, content={'message': f'Data exported to {output_file_name}'})
114+
async def _download_and_process_image_async(self, client: httpx.AsyncClient, url: str) -> Image | str | None:
115+
if not url or not isinstance(url, str) or not url.strip():
116+
return None
118117

118+
try:
119+
response = await client.get(url, timeout=30)
120+
response.raise_for_status()
121+
122+
input_stream = BytesIO(response.content)
123+
124+
with PilImage.open(input_stream) as pil_img:
125+
output_stream = BytesIO()
126+
pil_img.save(output_stream, format='PNG')
127+
128+
original_width, original_height = pil_img.size
129+
if original_width == 0:
130+
return 'Error: Invalid image width'
131+
aspect_ratio = original_height / original_width
132+
new_height = int(self.__FIXED_IMAGE_WIDTH_PX * aspect_ratio)
133+
134+
output_stream.seek(0)
135+
136+
img = Image(output_stream)
137+
img.width = self.__FIXED_IMAGE_WIDTH_PX
138+
img.height = new_height
139+
return img
140+
141+
except httpx.HTTPStatusError as e:
142+
logger.error(f'HTTP error for {url}: {e.response.status_code}')
143+
return f'Error: {e.response.status_code}'
144+
except httpx.RequestError as e:
145+
logger.error(f'Network error for {url}: {e}')
146+
return 'Error: Network issue'
119147
except Exception as e:
120-
logger.error(f'An error occurred during Excel export: {e}')
121-
return JSONResponse(
122-
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
123-
content={'message': f'An error occurred during Excel export: {e}'},
124-
)
148+
logger.error(f'Processing error for {url}: {e}')
149+
return 'Error: Corrupt image'
150+
151+
async def _embed_images_async(self, worksheet, source_df: pd.DataFrame, final_columns: pd.Index):
152+
image_column_idx = final_columns.get_loc('ID Image') + 1
153+
image_column_letter = chr(64 + image_column_idx)
154+
worksheet.column_dimensions[image_column_letter].width = (
155+
self.__FIXED_IMAGE_WIDTH_PX * self.__EXCEL_COLUMN_WIDTH_FACTOR
156+
)
157+
158+
async with httpx.AsyncClient() as client:
159+
tasks = [
160+
self._download_and_process_image_async(client, row.get('imageIdUrl')) for _, row in source_df.iterrows()
161+
]
162+
results = await asyncio.gather(*tasks)
163+
164+
for idx, result in enumerate(results):
165+
row_idx = idx + 2
166+
167+
if result is None:
168+
continue
169+
170+
if isinstance(result, Image):
171+
img = result
172+
worksheet.row_dimensions[row_idx].height = img.height * self.__EXCEL_ROW_HEIGHT_FACTOR
173+
worksheet.add_image(img, f'{image_column_letter}{row_idx}')
174+
else:
175+
worksheet.cell(row=row_idx, column=image_column_idx, value=str(result))

0 commit comments

Comments
 (0)