Project-Sothea · Nsohko · Dec 3, 2025
diff --git a/cron/backup.py b/cron/backup.py
@@ -29,7 +29,7 @@ def export_patient_data():
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
     output_path = backups_dir / f"{timestamp}_patientdata.csv"
 
-    # PostgreSQL export query
+    # PostgreSQL export query - Updated to match new backend schema
     query = f"""COPY (SELECT
     a.id,
     a.vid,
@@ -47,7 +47,14 @@ def export_patient_data():
     a.last_menstrual_period AS a_last_menstrual_period,
     a.drug_allergies AS a_drug_allergies,
     a.sent_to_id AS a_sent_to_id,
+    pmh.cough AS pmh_cough,
+    pmh.fever AS pmh_fever,
+    pmh.blocked_nose AS pmh_blocked_nose,
+    pmh.sore_throat AS pmh_sore_throat,
+    pmh.night_sweats AS pmh_night_sweats,
+    pmh.unintentional_weight_loss AS pmh_unintentional_weight_loss,
     pmh.tuberculosis AS pmh_tuberculosis,
+    pmh.tuberculosis_has_been_treated AS pmh_tuberculosis_has_been_treated,
     pmh.diabetes AS pmh_diabetes,
     pmh.hypertension AS pmh_hypertension,
     pmh.hyperlipidemia AS pmh_hyperlipidemia,
@@ -74,62 +81,39 @@ def export_patient_data():
     vs.hr2 AS vs_hr2,
     vs.avg_hr AS vs_avg_hr,
     vs.rand_blood_glucose_mmolL AS vs_rand_blood_glucose_mmoll,
+    vs.icope_high_bp AS vs_icope_high_bp,
     haw.height AS haw_height,
     haw.weight AS haw_weight,
     haw.bmi AS haw_bmi,
     haw.bmi_analysis AS haw_bmi_analysis,
     haw.paeds_height AS haw_paeds_height,
     haw.paeds_weight AS haw_paeds_weight,
+    haw.icope_lost_weight_past_months AS haw_icope_lost_weight_past_months,
+    haw.icope_no_desire_to_eat AS haw_icope_no_desire_to_eat,
     va.l_eye_vision AS va_l_eye_vision,
     va.r_eye_vision AS va_r_eye_vision,
+    va.sent_to_opto AS va_sent_to_opto,
+    va.referred_for_glasses AS va_referred_for_glasses,
+    va.icope_eye_problem AS va_icope_eye_problem,
+    va.icope_treated_for_diabetes_or_bp AS va_icope_treated_for_diabetes_or_bp,
     va.additional_intervention AS va_additional_intervention,
-    d.clean_teeth_freq AS d_clean_teeth_freq,
-    d.sugar_consume_freq AS d_sugar_consume_freq,
-    d.past_year_decay AS d_past_year_decay,
-    d.brush_teeth_pain AS d_brush_teeth_pain,
+    d.fluoride_exposure AS d_fluoride_exposure,
+    d.diet AS d_diet,
+    d.bacterial_exposure AS d_bacterial_exposure,
+    d.oral_symptoms AS d_oral_symptoms,
     d.drink_other_water AS d_drink_other_water,
+    d.risk_for_dental_carries AS d_risk_for_dental_carries,
+    d.icope_difficulty_chewing AS d_icope_difficulty_chewing,
+    d.icope_pain_in_mouth AS d_icope_pain_in_mouth,
     d.dental_notes AS d_dental_notes,
-    d.referral_needed AS d_referral_needed,
-    d.referral_loc AS d_referral_loc,
-    d.tooth_11 AS d_tooth_11,
-    d.tooth_12 AS d_tooth_12,
-    d.tooth_13 AS d_tooth_13,
-    d.tooth_14 AS d_tooth_14,
-    d.tooth_15 AS d_tooth_15,
-    d.tooth_16 AS d_tooth_16,
-    d.tooth_17 AS d_tooth_17,
-    d.tooth_18 AS d_tooth_18,
-    d.tooth_21 AS d_tooth_21,
-    d.tooth_22 AS d_tooth_22,
-    d.tooth_23 AS d_tooth_23,
-    d.tooth_24 AS d_tooth_24,
-    d.tooth_25 AS d_tooth_25,
-    d.tooth_26 AS d_tooth_26,
-    d.tooth_27 AS d_tooth_27,
-    d.tooth_28 AS d_tooth_28,
-    d.tooth_31 AS d_tooth_31,
-    d.tooth_32 AS d_tooth_32,
-    d.tooth_33 AS d_tooth_33,
-    d.tooth_34 AS d_tooth_34,
-    d.tooth_35 AS d_tooth_35,
-    d.tooth_36 AS d_tooth_36,
-    d.tooth_37 AS d_tooth_37,
-    d.tooth_38 AS d_tooth_38,
-    d.tooth_41 AS d_tooth_41,
-    d.tooth_42 AS d_tooth_42,
-    d.tooth_43 AS d_tooth_43,
-    d.tooth_44 AS d_tooth_44,
-    d.tooth_45 AS d_tooth_45,
-    d.tooth_46 AS d_tooth_46,
-    d.tooth_47 AS d_tooth_47,
-    d.tooth_48 AS d_tooth_48,
-    fr.fall_worries AS fr_fall_worries,
-    fr.fall_history AS fr_fall_history,
-    fr.cognitive_status AS fr_cognitive_status,
-    fr.continence_problems AS fr_continence_problems,
-    fr.safety_awareness AS fr_safety_awareness,
-    fr.unsteadiness AS fr_unsteadiness,
+    fr.side_to_side_balance AS fr_side_to_side_balance,
+    fr.semi_tandem_balance AS fr_semi_tandem_balance,
+    fr.tandem_balance AS fr_tandem_balance,
+    fr.gait_speed_test AS fr_gait_speed_test,
+    fr.chair_stand_test AS fr_chair_stand_test,
     fr.fall_risk_score AS fr_fall_risk_score,
+    fr.icope_complete_chair_stands AS fr_icope_complete_chair_stands,
+    fr.icope_chair_stands_time AS fr_icope_chair_stands_time,
     dc.well AS dc_well,
     dc.msk AS dc_msk,
     dc.cvs AS dc_cvs,
@@ -153,8 +137,7 @@ def export_patient_data():
     p.trouble_sleep_symptoms AS p_trouble_sleep_symptoms,
     p.how_much_fatigue AS p_how_much_fatigue,
     p.anxious_low_mood AS p_anxious_low_mood,
-    p.medication_manage_symptoms AS p_medication_manage_symptoms,
-    NULL AS a_photo
+    p.medication_manage_symptoms AS p_medication_manage_symptoms
     FROM admin a
     LEFT JOIN pastmedicalhistory pmh ON a.id = pmh.id AND a.vid = pmh.vid
     LEFT JOIN socialhistory sh ON a.id = sh.id AND a.vid = sh.vid

diff --git a/importer/importer.py b/importer/importer.py
@@ -6,9 +6,9 @@
 
 import pandas as pd
 from sqlalchemy.orm import sessionmaker
+from sqlalchemy import create_engine, text
 
 from columns import Types
-from sqlalchemy import create_engine
 from columns import CURRENT_DATABASE
 
 """
@@ -79,20 +79,66 @@ def processTable(self, df: pd.DataFrame, schema: dict, table_name: str) -> pd.Da
 
     def writeToDatabase(self, df: pd.DataFrame, table_name: str) -> None:
         """
-        Write DataFrame to database using SQLAlchemy session.
+        Write DataFrame to database using UPSERT (INSERT ... ON CONFLICT UPDATE).
+        Updates existing records if (id, vid) exists, otherwise inserts new records.
 
         :param df: DataFrame to write
         :param table_name: Name of the table
-        :param session: SQLAlchemy session
         """
+        if df.empty:
+            print(f"No data to write for table '{table_name}'.")
+            return
+
         try:
-            # Use pandas to_sql method with if_exists='append'
-            df.to_sql(table_name, self.session.bind, if_exists='append', index=False)
-            print(f"Data for table '{table_name}' has been written to the database.")
-            self.session.commit()
+            # Get column names (SQL names, not CSV names)
+            columns = df.columns.tolist()
+
+            # Build the INSERT ... ON CONFLICT UPDATE statement
+            # Primary key is always (id, vid) for all tables
+            placeholders = ', '.join([f':{col}' for col in columns])
+            column_list = ', '.join(columns)
+
+            # Build UPDATE clause: update all columns except id and vid
+            update_columns = [col for col in columns if col not in ['id', 'vid']]
+            update_clause = ', '.join([f'{col} = EXCLUDED.{col}' for col in update_columns])
+
+            # Construct the UPSERT SQL statement
+            sql = f"""
+                INSERT INTO {table_name} ({column_list})
+                VALUES ({placeholders})
+                ON CONFLICT (id, vid) 
+                DO UPDATE SET {update_clause}
+            """
+
+            # Execute for each row in a transaction
+            connection = self.engine.connect()
+            trans = connection.begin()
+            try:
+                for _, row in df.iterrows():
+                    # Convert row to dict, handling NaN values
+                    row_dict = {}
+                    for col in columns:
+                        value = row[col]
+                        # Convert pandas NaN/NaT to None (SQL NULL)
+                        if pd.isna(value):
+                            row_dict[col] = None
+                        else:
+                            row_dict[col] = value
+
+                    connection.execute(text(sql), row_dict)
+
+                trans.commit()
+                print(f"Data for table '{table_name}' has been written/updated successfully ({len(df)} rows).")
+            except Exception as e:
+                trans.rollback()
+                raise
+            finally:
+                connection.close()
+
         except Exception as e:
             self.session.rollback()
             print(f"Error writing to table '{table_name}': {e}")
+            raise
 
     def importToDatabase(self):
         """

diff --git a/initialiser/initialiser.py b/initialiser/initialiser.py
@@ -1,4 +1,3 @@
-import glob
 import os
 
 import sys
@@ -16,8 +15,8 @@
 from columns import Types
 
 """
-Initialise an Excel workbook from the patientdata csv file with data validation rules, formatting, and dropdowns.
-Takes csv files from patientdata_file_path and types from types_file_path.
+Initialise an empty Excel workbook with data validation rules, formatting, and dropdowns.
+Generates an empty DataSheet.xlsx with column headers based on types.csv.
 Places generated DataSheet.xlsx in the same directory as the script.
 """
 class Initialiser:
@@ -49,47 +48,24 @@ def initialise(self):
 
     def readCSV(self):
         """
-        - Tries to find the latest backup CSV in self.csv_folder_path based on a naming convention.
-        - If no matching file is found, defaults to reading any CSV file in the directory.
-        - Reads the CSV file into a pandas DataFrame, renames its columns from CSV name -> DataSheet name,
-          and initializes the Excel workbook and worksheet.
+        - Generates an empty DataFrame with column headers based on types.csv
+        - Creates an empty Excel workbook with the correct column structure
+        - Initializes the Excel workbook and worksheet
         """
         try:
-            try:
-                # Try to find the latest file matching the naming convention
-                file_pattern = os.path.join(self.csv_folder_path, "*_patientdata.csv")
-                files = glob.glob(file_pattern)
-
-                if not files:
-                    raise FileNotFoundError("No files matching the naming convention found.")
-
-                # Extract the timestamp from filenames and sort them
-                files_with_timestamps = [
-                    (file, filename.split("_patientdata.csv")[0])
-                    for file in files
-                    for filename in [os.path.basename(file)]
-                ]
-                latest_file = max(files_with_timestamps, key=lambda x: x[1])[0]
-            except Exception:
-                # Fallback: Read any CSV file in the directory
-                print(
-                    "No files matching the naming convention. Falling back to any CSV file in the folder."
-                )
-                all_files = glob.glob(os.path.join(self.csv_folder_path, "*.csv"))
-                if not all_files:
-                    raise FileNotFoundError(f"No CSV files found in {self.csv_folder_path}.")
-                latest_file = max(all_files, key=os.path.getmtime)  # Use the most recent file
-
-            print(f"CSV file identified: {latest_file}")
-
-            # Read the identified CSV file into a DataFrame
-            self.df = pd.read_csv(latest_file)
-            print(f"CSV file '{latest_file}' read successfully!")
-
-            # Rename columns of self.df from CSV Name to DataSheet Name
-            self.rename_columns()
-
-            # Save DataFrame to an Excel file
+            # Generate a mapping from csv_name to datasheet_name
+            csv_to_datasheet_map = {}
+            for category in self.types.categories:
+                for field in category.fields:
+                    csv_to_datasheet_map[field.csv_name] = field.datasheet_name
+
+            # Create an empty DataFrame with all datasheet column names
+            datasheet_columns = [field.datasheet_name for category in self.types.categories for field in category.fields]
+            self.df = pd.DataFrame(columns=datasheet_columns)
+
+            print("Empty DataFrame created with column headers based on types.csv")
+
+            # Save empty DataFrame to an Excel file
             self.df.to_excel(self.wb_name, sheet_name=self.ws_name, index=False)
 
             # Load and store the workbook
@@ -99,9 +75,10 @@ def readCSV(self):
                 raise ValueError(f"Sheet '{self.ws_name}' does not exist in the workbook.")
 
             self.ws = self.wb[self.ws_name]
+            print(f"Empty Excel workbook '{self.wb_name}' created successfully!")
 
         except Exception as e:
-            print(f"Error reading CSV file: {e}")
+            print(f"Error creating empty Excel file: {e}")
 
     def applyValidationRules(self):
         """
@@ -208,45 +185,29 @@ def applyConditionalFormatting(self):
             ]
 
             for row_number in range(2, 301): # start and end rows
-                filled_check = ", ".join([f'ISBLANK(${col}{row_number})' for col in required_columns])
-                formula = f'=OR({filled_check})'
-                rule = FormulaRule(formula=[formula], fill=red_fill)
-
-                for col in category_columns:
+                # Mark a cell red only if it is empty
+                for col in required_columns:
+                    formula = f'=ISBLANK(${col}{row_number})'
+                    rule = FormulaRule(formula=[formula], fill=red_fill)
                     self.ws.conditional_formatting.add(f"{col}{row_number}", rule)
 
+
         self.wb.save(self.wb_name)
         print("Conditional formatting applied successfully!")
 
     def rename_columns(self):
         """
-        Renames the columns of the existing DataFrame based on a csv_name to datasheet_name mapping.
-
-        Raises:
-            ValueError: If self.types or self.df is not initialized.
+        This method is no longer needed since we generate empty sheets directly with datasheet names.
+        Kept for backward compatibility but does nothing.
         """
-        # Check if self.types and self.df are initialized
-        if self.types is None:
-            raise ValueError(
-                "The 'types' object is not initialized. Please initialize 'self.types' before renaming columns.")
-        if self.df is None:
-            raise ValueError(
-                "The DataFrame 'self.df' is not initialized. Please load data into 'self.df' before renaming columns.")
-
-        # Generate a mapping from csv_name to datasheet_name
-        csv_to_datasheet_map = {}
-        for category in self.types.categories:
-            for field in category.fields:
-                csv_to_datasheet_map[field.csv_name] = field.datasheet_name
-
-        # Rename the columns of the existing DataFrame
-        self.df.rename(columns=csv_to_datasheet_map, inplace=True)
+        # No-op: columns are already in datasheet format when creating empty DataFrame
+        pass
 
 if __name__ == "__main__":
-    # Path to patient data csv file
-    patientdata_file_path = "../cron/backups"  # Replace with your directory path
+    # types_file_path is still needed to define the schema
     types_file_path = "../types.csv"
-    datasheet_file_path = "../server/downloads"
+    # patientdata_file_path is no longer used but kept for backward compatibility
+    patientdata_file_path = "../cron/backups"  # Not used anymore, kept for compatibility
 
     # delete the existing DataSheet.xlsx file
     try:
@@ -259,4 +220,4 @@ def rename_columns(self):
     initialiser.readCSV()
     initialiser.applyValidationRules()
     initialiser.applyFormatting()
-    initialiser.applyConditionalFormatting()
+    initialiser.applyConditionalFormatting()
diff --git a/merger/merger.py b/merger/merger.py
@@ -124,6 +124,7 @@ def readDataSheets(self):
     def mergeDataSheets(self, output_file="merged_output.csv"):
         """
         Merge read dataframes on 'id' and 'vid' columns.
+        For rows with the same (id, vid), combines non-null values from different sheets.
 
         Args:
             output_file (str): Name of the output file to save the merged dataframe.
@@ -139,11 +140,18 @@ def mergeDataSheets(self, output_file="merged_output.csv"):
             # Concatenate dataframes
             self.df = pd.concat(self.dataframes, ignore_index=True)
 
-            # Rename columns based on the types
+            # Rename columns based on the types (second pass - redundant but kept for compatibility)
             self.renameColumns()
 
-            # Group by 'id' and 'vid' and aggregate to merge rows
-            self.df = self.df.groupby(self.merge_cols, as_index=False).first()
+            # Define aggregation function that takes first non-null value
+            def first_non_null(series):
+                """Return first non-null value, or null if all are null"""
+                non_null = series.dropna()
+                return non_null.iloc[0] if len(non_null) > 0 else None
+
+            # Group by 'id' and 'vid' and aggregate using first non-null value for each column
+            agg_dict = {col: first_non_null for col in self.df.columns if col not in self.merge_cols}
+            self.df = self.df.groupby(self.merge_cols, as_index=False).agg(agg_dict)
 
             # Save the merged dataframe to a new CSV file
             self.df.to_csv(output_file, index=False)