Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 30 additions & 47 deletions cron/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def export_patient_data():
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = backups_dir / f"{timestamp}_patientdata.csv"

# PostgreSQL export query
# PostgreSQL export query - Updated to match new backend schema
query = f"""COPY (SELECT
a.id,
a.vid,
Expand All @@ -47,7 +47,14 @@ def export_patient_data():
a.last_menstrual_period AS a_last_menstrual_period,
a.drug_allergies AS a_drug_allergies,
a.sent_to_id AS a_sent_to_id,
pmh.cough AS pmh_cough,
pmh.fever AS pmh_fever,
pmh.blocked_nose AS pmh_blocked_nose,
pmh.sore_throat AS pmh_sore_throat,
pmh.night_sweats AS pmh_night_sweats,
pmh.unintentional_weight_loss AS pmh_unintentional_weight_loss,
pmh.tuberculosis AS pmh_tuberculosis,
pmh.tuberculosis_has_been_treated AS pmh_tuberculosis_has_been_treated,
pmh.diabetes AS pmh_diabetes,
pmh.hypertension AS pmh_hypertension,
pmh.hyperlipidemia AS pmh_hyperlipidemia,
Expand All @@ -74,62 +81,39 @@ def export_patient_data():
vs.hr2 AS vs_hr2,
vs.avg_hr AS vs_avg_hr,
vs.rand_blood_glucose_mmolL AS vs_rand_blood_glucose_mmoll,
vs.icope_high_bp AS vs_icope_high_bp,
haw.height AS haw_height,
haw.weight AS haw_weight,
haw.bmi AS haw_bmi,
haw.bmi_analysis AS haw_bmi_analysis,
haw.paeds_height AS haw_paeds_height,
haw.paeds_weight AS haw_paeds_weight,
haw.icope_lost_weight_past_months AS haw_icope_lost_weight_past_months,
haw.icope_no_desire_to_eat AS haw_icope_no_desire_to_eat,
va.l_eye_vision AS va_l_eye_vision,
va.r_eye_vision AS va_r_eye_vision,
va.sent_to_opto AS va_sent_to_opto,
va.referred_for_glasses AS va_referred_for_glasses,
va.icope_eye_problem AS va_icope_eye_problem,
va.icope_treated_for_diabetes_or_bp AS va_icope_treated_for_diabetes_or_bp,
va.additional_intervention AS va_additional_intervention,
d.clean_teeth_freq AS d_clean_teeth_freq,
d.sugar_consume_freq AS d_sugar_consume_freq,
d.past_year_decay AS d_past_year_decay,
d.brush_teeth_pain AS d_brush_teeth_pain,
d.fluoride_exposure AS d_fluoride_exposure,
d.diet AS d_diet,
d.bacterial_exposure AS d_bacterial_exposure,
d.oral_symptoms AS d_oral_symptoms,
d.drink_other_water AS d_drink_other_water,
d.risk_for_dental_carries AS d_risk_for_dental_carries,
d.icope_difficulty_chewing AS d_icope_difficulty_chewing,
d.icope_pain_in_mouth AS d_icope_pain_in_mouth,
d.dental_notes AS d_dental_notes,
d.referral_needed AS d_referral_needed,
d.referral_loc AS d_referral_loc,
d.tooth_11 AS d_tooth_11,
d.tooth_12 AS d_tooth_12,
d.tooth_13 AS d_tooth_13,
d.tooth_14 AS d_tooth_14,
d.tooth_15 AS d_tooth_15,
d.tooth_16 AS d_tooth_16,
d.tooth_17 AS d_tooth_17,
d.tooth_18 AS d_tooth_18,
d.tooth_21 AS d_tooth_21,
d.tooth_22 AS d_tooth_22,
d.tooth_23 AS d_tooth_23,
d.tooth_24 AS d_tooth_24,
d.tooth_25 AS d_tooth_25,
d.tooth_26 AS d_tooth_26,
d.tooth_27 AS d_tooth_27,
d.tooth_28 AS d_tooth_28,
d.tooth_31 AS d_tooth_31,
d.tooth_32 AS d_tooth_32,
d.tooth_33 AS d_tooth_33,
d.tooth_34 AS d_tooth_34,
d.tooth_35 AS d_tooth_35,
d.tooth_36 AS d_tooth_36,
d.tooth_37 AS d_tooth_37,
d.tooth_38 AS d_tooth_38,
d.tooth_41 AS d_tooth_41,
d.tooth_42 AS d_tooth_42,
d.tooth_43 AS d_tooth_43,
d.tooth_44 AS d_tooth_44,
d.tooth_45 AS d_tooth_45,
d.tooth_46 AS d_tooth_46,
d.tooth_47 AS d_tooth_47,
d.tooth_48 AS d_tooth_48,
fr.fall_worries AS fr_fall_worries,
fr.fall_history AS fr_fall_history,
fr.cognitive_status AS fr_cognitive_status,
fr.continence_problems AS fr_continence_problems,
fr.safety_awareness AS fr_safety_awareness,
fr.unsteadiness AS fr_unsteadiness,
fr.side_to_side_balance AS fr_side_to_side_balance,
fr.semi_tandem_balance AS fr_semi_tandem_balance,
fr.tandem_balance AS fr_tandem_balance,
fr.gait_speed_test AS fr_gait_speed_test,
fr.chair_stand_test AS fr_chair_stand_test,
fr.fall_risk_score AS fr_fall_risk_score,
fr.icope_complete_chair_stands AS fr_icope_complete_chair_stands,
fr.icope_chair_stands_time AS fr_icope_chair_stands_time,
dc.well AS dc_well,
dc.msk AS dc_msk,
dc.cvs AS dc_cvs,
Expand All @@ -153,8 +137,7 @@ def export_patient_data():
p.trouble_sleep_symptoms AS p_trouble_sleep_symptoms,
p.how_much_fatigue AS p_how_much_fatigue,
p.anxious_low_mood AS p_anxious_low_mood,
p.medication_manage_symptoms AS p_medication_manage_symptoms,
NULL AS a_photo
p.medication_manage_symptoms AS p_medication_manage_symptoms
FROM admin a
LEFT JOIN pastmedicalhistory pmh ON a.id = pmh.id AND a.vid = pmh.vid
LEFT JOIN socialhistory sh ON a.id = sh.id AND a.vid = sh.vid
Expand Down
60 changes: 53 additions & 7 deletions importer/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

import pandas as pd
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine, text

from columns import Types
from sqlalchemy import create_engine
from columns import CURRENT_DATABASE

"""
Expand Down Expand Up @@ -79,20 +79,66 @@ def processTable(self, df: pd.DataFrame, schema: dict, table_name: str) -> pd.Da

def writeToDatabase(self, df: pd.DataFrame, table_name: str) -> None:
"""
Write DataFrame to database using SQLAlchemy session.
Write DataFrame to database using UPSERT (INSERT ... ON CONFLICT UPDATE).
Updates existing records if (id, vid) exists, otherwise inserts new records.

:param df: DataFrame to write
:param table_name: Name of the table
:param session: SQLAlchemy session
"""
if df.empty:
print(f"No data to write for table '{table_name}'.")
return

try:
# Use pandas to_sql method with if_exists='append'
df.to_sql(table_name, self.session.bind, if_exists='append', index=False)
print(f"Data for table '{table_name}' has been written to the database.")
self.session.commit()
# Get column names (SQL names, not CSV names)
columns = df.columns.tolist()

# Build the INSERT ... ON CONFLICT UPDATE statement
# Primary key is always (id, vid) for all tables
placeholders = ', '.join([f':{col}' for col in columns])
column_list = ', '.join(columns)

# Build UPDATE clause: update all columns except id and vid
update_columns = [col for col in columns if col not in ['id', 'vid']]
update_clause = ', '.join([f'{col} = EXCLUDED.{col}' for col in update_columns])

# Construct the UPSERT SQL statement
sql = f"""
INSERT INTO {table_name} ({column_list})
VALUES ({placeholders})
ON CONFLICT (id, vid)
DO UPDATE SET {update_clause}
"""

# Execute for each row in a transaction
connection = self.engine.connect()
trans = connection.begin()
try:
for _, row in df.iterrows():
# Convert row to dict, handling NaN values
row_dict = {}
for col in columns:
value = row[col]
# Convert pandas NaN/NaT to None (SQL NULL)
if pd.isna(value):
row_dict[col] = None
else:
row_dict[col] = value

connection.execute(text(sql), row_dict)

trans.commit()
print(f"Data for table '{table_name}' has been written/updated successfully ({len(df)} rows).")
except Exception as e:
trans.rollback()
raise
finally:
connection.close()

except Exception as e:
self.session.rollback()
print(f"Error writing to table '{table_name}': {e}")
raise

def importToDatabase(self):
"""
Expand Down
105 changes: 33 additions & 72 deletions initialiser/initialiser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import glob
import os

import sys
Expand All @@ -16,8 +15,8 @@
from columns import Types

"""
Initialise an Excel workbook from the patientdata csv file with data validation rules, formatting, and dropdowns.
Takes csv files from patientdata_file_path and types from types_file_path.
Initialise an empty Excel workbook with data validation rules, formatting, and dropdowns.
Generates an empty DataSheet.xlsx with column headers based on types.csv.
Places generated DataSheet.xlsx in the same directory as the script.
"""
class Initialiser:
Expand Down Expand Up @@ -49,47 +48,24 @@ def initialise(self):

def readCSV(self):
"""
- Tries to find the latest backup CSV in self.csv_folder_path based on a naming convention.
- If no matching file is found, defaults to reading any CSV file in the directory.
- Reads the CSV file into a pandas DataFrame, renames its columns from CSV name -> DataSheet name,
and initializes the Excel workbook and worksheet.
- Generates an empty DataFrame with column headers based on types.csv
- Creates an empty Excel workbook with the correct column structure
- Initializes the Excel workbook and worksheet
"""
try:
try:
# Try to find the latest file matching the naming convention
file_pattern = os.path.join(self.csv_folder_path, "*_patientdata.csv")
files = glob.glob(file_pattern)

if not files:
raise FileNotFoundError("No files matching the naming convention found.")

# Extract the timestamp from filenames and sort them
files_with_timestamps = [
(file, filename.split("_patientdata.csv")[0])
for file in files
for filename in [os.path.basename(file)]
]
latest_file = max(files_with_timestamps, key=lambda x: x[1])[0]
except Exception:
# Fallback: Read any CSV file in the directory
print(
"No files matching the naming convention. Falling back to any CSV file in the folder."
)
all_files = glob.glob(os.path.join(self.csv_folder_path, "*.csv"))
if not all_files:
raise FileNotFoundError(f"No CSV files found in {self.csv_folder_path}.")
latest_file = max(all_files, key=os.path.getmtime) # Use the most recent file

print(f"CSV file identified: {latest_file}")

# Read the identified CSV file into a DataFrame
self.df = pd.read_csv(latest_file)
print(f"CSV file '{latest_file}' read successfully!")

# Rename columns of self.df from CSV Name to DataSheet Name
self.rename_columns()

# Save DataFrame to an Excel file
# Generate a mapping from csv_name to datasheet_name
csv_to_datasheet_map = {}
for category in self.types.categories:
for field in category.fields:
csv_to_datasheet_map[field.csv_name] = field.datasheet_name

# Create an empty DataFrame with all datasheet column names
datasheet_columns = [field.datasheet_name for category in self.types.categories for field in category.fields]
self.df = pd.DataFrame(columns=datasheet_columns)

print("Empty DataFrame created with column headers based on types.csv")

# Save empty DataFrame to an Excel file
self.df.to_excel(self.wb_name, sheet_name=self.ws_name, index=False)

# Load and store the workbook
Expand All @@ -99,9 +75,10 @@ def readCSV(self):
raise ValueError(f"Sheet '{self.ws_name}' does not exist in the workbook.")

self.ws = self.wb[self.ws_name]
print(f"Empty Excel workbook '{self.wb_name}' created successfully!")

except Exception as e:
print(f"Error reading CSV file: {e}")
print(f"Error creating empty Excel file: {e}")

def applyValidationRules(self):
"""
Expand Down Expand Up @@ -208,45 +185,29 @@ def applyConditionalFormatting(self):
]

for row_number in range(2, 301): # start and end rows
filled_check = ", ".join([f'ISBLANK(${col}{row_number})' for col in required_columns])
formula = f'=OR({filled_check})'
rule = FormulaRule(formula=[formula], fill=red_fill)

for col in category_columns:
# Mark a cell red only if it is empty
for col in required_columns:
formula = f'=ISBLANK(${col}{row_number})'
rule = FormulaRule(formula=[formula], fill=red_fill)
self.ws.conditional_formatting.add(f"{col}{row_number}", rule)


self.wb.save(self.wb_name)
print("Conditional formatting applied successfully!")

def rename_columns(self):
"""
Renames the columns of the existing DataFrame based on a csv_name to datasheet_name mapping.

Raises:
ValueError: If self.types or self.df is not initialized.
This method is no longer needed since we generate empty sheets directly with datasheet names.
Kept for backward compatibility but does nothing.
"""
# Check if self.types and self.df are initialized
if self.types is None:
raise ValueError(
"The 'types' object is not initialized. Please initialize 'self.types' before renaming columns.")
if self.df is None:
raise ValueError(
"The DataFrame 'self.df' is not initialized. Please load data into 'self.df' before renaming columns.")

# Generate a mapping from csv_name to datasheet_name
csv_to_datasheet_map = {}
for category in self.types.categories:
for field in category.fields:
csv_to_datasheet_map[field.csv_name] = field.datasheet_name

# Rename the columns of the existing DataFrame
self.df.rename(columns=csv_to_datasheet_map, inplace=True)
# No-op: columns are already in datasheet format when creating empty DataFrame
pass

if __name__ == "__main__":
# Path to patient data csv file
patientdata_file_path = "../cron/backups" # Replace with your directory path
# types_file_path is still needed to define the schema
types_file_path = "../types.csv"
datasheet_file_path = "../server/downloads"
# patientdata_file_path is no longer used but kept for backward compatibility
patientdata_file_path = "../cron/backups" # Not used anymore, kept for compatibility

# delete the existing DataSheet.xlsx file
try:
Expand All @@ -259,4 +220,4 @@ def rename_columns(self):
initialiser.readCSV()
initialiser.applyValidationRules()
initialiser.applyFormatting()
initialiser.applyConditionalFormatting()
initialiser.applyConditionalFormatting()
14 changes: 11 additions & 3 deletions merger/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def readDataSheets(self):
def mergeDataSheets(self, output_file="merged_output.csv"):
"""
Merge read dataframes on 'id' and 'vid' columns.
For rows with the same (id, vid), combines non-null values from different sheets.

Args:
output_file (str): Name of the output file to save the merged dataframe.
Expand All @@ -139,11 +140,18 @@ def mergeDataSheets(self, output_file="merged_output.csv"):
# Concatenate dataframes
self.df = pd.concat(self.dataframes, ignore_index=True)

# Rename columns based on the types
# Rename columns based on the types (second pass - redundant but kept for compatibility)
self.renameColumns()

# Group by 'id' and 'vid' and aggregate to merge rows
self.df = self.df.groupby(self.merge_cols, as_index=False).first()
# Define aggregation function that takes first non-null value
def first_non_null(series):
"""Return first non-null value, or null if all are null"""
non_null = series.dropna()
return non_null.iloc[0] if len(non_null) > 0 else None

# Group by 'id' and 'vid' and aggregate using first non-null value for each column
agg_dict = {col: first_non_null for col in self.df.columns if col not in self.merge_cols}
self.df = self.df.groupby(self.merge_cols, as_index=False).agg(agg_dict)

# Save the merged dataframe to a new CSV file
self.df.to_csv(output_file, index=False)
Expand Down
Loading