StateFarmInsCodingCompetition · leo-witzke · Oct 15, 2023 · Oct 15, 2023 · Oct 15, 2023
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+    "python.testing.pytestArgs": [
+        "python"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
diff --git a/FEEDBACK.md b/FEEDBACK.md
@@ -1,13 +1,12 @@
 # Feedback
 
-1. Your team:
-2. Name of each individual participating:
-3. How many unit tests were you able to pass? 
+1. Your team: Whisky Whiskers
+2. Name of each individual participating: Leo Witzke
+3. How many unit tests were you able to pass? All. I had to change the relative file paths of the input json files for my pytest to work. U may have to change them back for yours to work.
 4. Document and describe any enhancements included to help the judges properly grade your submission.
-    - Example One
-    - Example Two
-    - Example Three
-
+ - Completed test case 12
+ - Run `cool_visualizations.py` to see two cool visualizations
 5. Any feedback for the coding competition? Things you would like to see in future events?
+I spent over an hour stuck on test case 12 before I saw the announcement, but was good besides that. More difficult problems next time.
 
 This form can also be emailed to [codingcompetition@statefarm.com](mailto:codingcompetition@statefarm.com). Just make sure that you include a link to your GitHub pull requests.
diff --git a/python/cool_visualizations.py b/python/cool_visualizations.py
@@ -0,0 +1,47 @@
+import simple_data_tool
+import helpers
+import plotly.express as px
+import pandas as pd
+
+disaster_data = simple_data_tool.SimpleDataTool.load_json_from_file(None, "data/sfcc_2023_disasters.json")
+claims_data = simple_data_tool.SimpleDataTool.load_json_from_file(None, "data/sfcc_2023_claims.json")
+
+def map_to_key_index(map):
+    map_keys = list(map.keys())
+    map_by_index = {}
+    for index in range(len(map_keys)):
+        values = [map_keys[index]]
+        keys_value = map[map_keys[index]]
+        if isinstance(keys_value, list):
+            values.extend(keys_value)
+        else:
+            values.extend([keys_value])
+        map_by_index[index] = values
+    return map_by_index
+
+
+disaster_to_state = {}
+for disaster in disaster_data:
+    disaster_to_state[disaster["id"]] = helpers.state_to_abbreviation[disaster["state"]]
+state_to_claims_cost = {}
+for claim in claims_data:
+    state = disaster_to_state[claim["disaster_id"]]
+    state_to_claims_cost[state] = state_to_claims_cost.get(state,0) + claim["estimate_cost"]
+disasters_by_state_df = pd.DataFrame.from_dict(map_to_key_index(state_to_claims_cost), orient="index", columns=["state", "total claims cost"])
+fig = px.choropleth(disasters_by_state_df,locations="state", locationmode="USA-states", color="total claims cost", scope="usa")
+fig.show()
+
+
+disaster_to_type = {}
+for disaster in disaster_data:
+    disaster_to_type[disaster["id"]] = disaster["type"]
+type_to_impact = {}
+for claim in claims_data:
+    type = disaster_to_type[claim["disaster_id"]]
+    cost = claim["estimate_cost"]
+    life_lost = claim["loss_of_life"]
+    type_to_impact[type] = type_to_impact.get(type,[0,0])
+    type_to_impact[type] = [type_to_impact[type][0] + cost, type_to_impact[type][1] + life_lost]
+disasters_by_cost_df = pd.DataFrame.from_dict(map_to_key_index(type_to_impact), orient="index", columns=["type of disaster", "total cost", "loss of life claims"])
+fig = px.bar(disasters_by_cost_df, x="type of disaster", y="total cost", color="loss of life claims", title="Total Cost of Disasters")
+fig.show()
diff --git a/python/helpers.py b/python/helpers.py
@@ -0,0 +1,112 @@
+import datetime
+
+def counter(json, key, value):
+    count = 0
+    for item in json:
+        if item[key] == value:
+            count += 1
+    return count
+
+def totaler(json, key, value, key_to_total):
+    total = 0
+    for item in json:
+        if item[key] == value:
+            total += item[key_to_total]
+    return total
+
+def map_counter(json, key):
+    map_generated = {}
+    for item in json:
+        map_generated[item[key]] = map_generated.get(item[key],0) + 1
+    return map_generated
+
+def map_totaler(json, map_key, value_key):
+    map_generated = {}
+    for item in json:
+        map_generated[item[map_key]] = map_generated.get(item[map_key],0) + item[value_key]
+    return map_generated
+
+def max_map_alphabetically(map):
+    keys = list(map.keys())
+    keys.sort()
+    max_key = keys[0]
+    max_value = map[keys[0]]
+    for key in keys:
+        if map[key] > max_value:
+            max_key = key
+            max_value = map[key]
+    return max_key
+
+def min_map_alphabetically(map):
+    keys = list(map.keys())
+    keys.sort()
+    min_key = keys[0]
+    min_value = map[keys[0]]
+    for key in keys:
+        if map[key] < min_value:
+            min_key = key
+            min_value = map[key]
+    return min_key
+
+def date_to_datetime(date):
+    return datetime.datetime.strptime(date, "%Y-%m-%d")
+
+state_to_abbreviation = {
+    "Alabama": "AL",
+    "Alaska": "AK",
+    "Arizona": "AZ",
+    "Arkansas": "AR",
+    "California": "CA",
+    "Colorado": "CO",
+    "Connecticut": "CT",
+    "Delaware": "DE",
+    "Florida": "FL",
+    "Georgia": "GA",
+    "Hawaii": "HI",
+    "Idaho": "ID",
+    "Illinois": "IL",
+    "Indiana": "IN",
+    "Iowa": "IA",
+    "Kansas": "KS",
+    "Kentucky": "KY",
+    "Louisiana": "LA",
+    "Maine": "ME",
+    "Maryland": "MD",
+    "Massachusetts": "MA",
+    "Michigan": "MI",
+    "Minnesota": "MN",
+    "Mississippi": "MS",
+    "Missouri": "MO",
+    "Montana": "MT",
+    "Nebraska": "NE",
+    "Nevada": "NV",
+    "New Hampshire": "NH",
+    "New Jersey": "NJ",
+    "New Mexico": "NM",
+    "New York": "NY",
+    "North Carolina": "NC",
+    "North Dakota": "ND",
+    "Ohio": "OH",
+    "Oklahoma": "OK",
+    "Oregon": "OR",
+    "Pennsylvania": "PA",
+    "Rhode Island": "RI",
+    "South Carolina": "SC",
+    "South Dakota": "SD",
+    "Tennessee": "TN",
+    "Texas": "TX",
+    "Utah": "UT",
+    "Vermont": "VT",
+    "Virginia": "VA",
+    "Washington": "WA",
+    "West Virginia": "WV",
+    "Wisconsin": "WI",
+    "Wyoming": "WY",
+    "District of Columbia": "DC",
+    "American Samoa": "AS",
+    "Guam": "GU",
+    "Northern Mariana Islands": "MP",
+    "Puerto Rico": "PR",
+    "United States Minor Outlying Islands": "UM",
+    "U.S. Virgin Islands": "VI",
+}
diff --git a/python/requirements.txt b/python/requirements.txt
@@ -1 +1,4 @@
-pytest==7.2.2
+pytest==7.2.2
+pandas
+plotly
+datetime
diff --git a/python/simple_data_tool.py b/python/simple_data_tool.py
@@ -1,16 +1,15 @@
 import json
 import math
+import helpers
 
 from statistics import mean
 
-
-
 class SimpleDataTool:
 
-    AGENTS_FILEPATH = 'data/sfcc_2023_agents.json'
-    CLAIM_HANDLERS_FILEPATH = 'data/sfcc_2023_claim_handlers.json'
-    CLAIMS_FILEPATH = 'data/sfcc_2023_claims.json'
-    DISASTERS_FILEPATH = 'data/sfcc_2023_disasters.json'
+    AGENTS_FILEPATH = 'python/data/sfcc_2023_agents.json'
+    CLAIM_HANDLERS_FILEPATH = 'python/data/sfcc_2023_claim_handlers.json'
+    CLAIMS_FILEPATH = 'python/data/sfcc_2023_claims.json'
+    DISASTERS_FILEPATH = 'python/data/sfcc_2023_disasters.json'
 
     REGION_MAP = {
         'west': 'Alaska,Hawaii,Washington,Oregon,California,Montana,Idaho,Wyoming,Nevada,Utah,Colorado,Arizona,New Mexico',
@@ -59,7 +58,7 @@ def get_num_closed_claims(self):
         Returns:
             int: number of closed claims
         """
-        pass
+        return helpers.counter(self.get_claim_data(), "status", "Closed")
 
     def get_num_claims_for_claim_handler_id(self, claim_handler_id):
         """Calculates the number of claims assigned to a specific claim handler
@@ -70,7 +69,7 @@ def get_num_claims_for_claim_handler_id(self, claim_handler_id):
         Returns:
             int: number of claims assigned to claim handler
         """
-        pass
+        return helpers.counter(self.get_claim_data(), "claim_handler_assigned_id", claim_handler_id)
 
     def get_num_disasters_for_state(self, state):
         """Calculates the number of disasters for a specific state
@@ -82,7 +81,7 @@ def get_num_disasters_for_state(self, state):
         Returns:
             int: number of disasters for state
         """
-        pass
+        return helpers.counter(self.get_disaster_data(), "state", state)
 
     # endregion
 
@@ -98,8 +97,7 @@ def get_total_claim_cost_for_disaster(self, disaster_id):
             float | None: estimate cost of disaster, rounded to the nearest hundredths place
                           returns None if no claims are found
         """
-
-        pass
+        return helpers.totaler(self.get_claim_data(), "disaster_id", disaster_id, "estimate_cost") or None
 
     def get_average_claim_cost_for_claim_handler(self, claim_handler_id):
         """Gets the average estimated cost of all claims assigned to a claim handler
@@ -111,8 +109,11 @@ def get_average_claim_cost_for_claim_handler(self, claim_handler_id):
             float | None : average cost of claims, rounded to the nearest hundredths place
                            or None if no claims are found
         """
-
-        pass
+        total_cost = helpers.totaler(self.get_claim_data(), "claim_handler_assigned_id", claim_handler_id, "estimate_cost")
+        count = helpers.counter(self.get_claim_data(), "claim_handler_assigned_id", claim_handler_id)
+        if total_cost == 0 or count == 0:
+            return None
+        return round(total_cost/count, 2)
 
     def get_state_with_most_disasters(self):
         """Returns the name of the state with the most disasters based on disaster data
@@ -127,7 +128,8 @@ def get_state_with_most_disasters(self):
         Returns:
             string: single name of state
         """
-        pass
+        state_to_disaster = helpers.map_counter(self.get_disaster_data(), "state")
+        return helpers.max_map_alphabetically(state_to_disaster)
 
     def get_state_with_least_disasters(self):
         """Returns the name of the state with the least disasters based on disaster data
@@ -142,7 +144,8 @@ def get_state_with_least_disasters(self):
         Returns:
             string: single name of state
         """
-        pass
+        state_to_disaster = helpers.map_counter(self.get_disaster_data(), "state")
+        return helpers.min_map_alphabetically(state_to_disaster)
 
     def get_most_spoken_agent_language_by_state(self, state):
         """Returns the name of the most spoken language by agents (besides English) for a specific state
@@ -154,7 +157,18 @@ def get_most_spoken_agent_language_by_state(self, state):
             string: name of language
                     or empty string if state doesn't exist
         """
-        pass
+        language_map = {}
+        def increment_langauge(language):
+            if language != "English":
+                language_map[language] = language_map.get(language, 0) + 1
+        for agent in self.get_agent_data():
+            if agent["state"] == state:
+                increment_langauge(agent["primary_language"])
+                increment_langauge(agent["secondary_language"])
+        if len(language_map) == 0:
+            return ""
+        return helpers.max_map_alphabetically(language_map)
+
 
     def get_num_of_open_claims_for_agent_and_severity(self, agent_id, min_severity_rating):
         """Returns the number of open claims for a specific agent and for a minimum severity level and higher
@@ -170,8 +184,14 @@ def get_num_of_open_claims_for_agent_and_severity(self, agent_id, min_severity_r
                         -1 if severity rating out of bounds
                         None if agent does not exist, or agent has no claims (open or not)
         """
-
-        pass
+        if min_severity_rating < 1 or min_severity_rating > 10:
+            return -1
+        count = 0
+        for claim in self.get_claim_data():
+            if claim["agent_assigned_id"] == agent_id and claim["status"] != "Closed":
+                if claim["severity_rating"] >= min_severity_rating:
+                    count += 1
+        return count or None
 
     # endregion
 
@@ -183,8 +203,13 @@ def get_num_disasters_declared_after_end_date(self):
         Returns:
             int: number of disasters where the declared date is after the end date
         """
-
-        pass
+        def declared_exceeds_end(disaster):
+            return helpers.date_to_datetime(disaster["declared_date"]) > helpers.date_to_datetime(disaster["end_date"])
+        count = 0
+        for disaster in self.get_disaster_data():
+            if declared_exceeds_end(disaster):
+                count += 1
+        return count
 
     def build_map_of_agents_to_total_claim_cost(self):
         """Builds a map of agent and their total claim cost
@@ -197,8 +222,14 @@ def build_map_of_agents_to_total_claim_cost(self):
         Returns:
             dict: key is agent id, value is total cost of claims associated to the agent
         """
-
-        pass
+        agent_to_cost = {}
+        for agent in self.get_agent_data():
+            agent_to_cost[agent["id"]] = 0
+        for claim in self.get_claim_data():
+            agent_to_cost[claim["agent_assigned_id"]] += claim["estimate_cost"]
+        for id in list(agent_to_cost.keys()):
+            agent_to_cost[id] = round(agent_to_cost[id],2)
+        return agent_to_cost
 
     def calculate_disaster_claim_density(self, disaster_id):
         """Calculates density of a diaster based on the number of claims and impact radius
@@ -214,7 +245,13 @@ def calculate_disaster_claim_density(self, disaster_id):
             float: density of claims to disaster area, rounded to the nearest thousandths place
                    None if disaster does not exist
         """
-        pass
+        disaster_area = None
+        for disaster in self.get_disaster_data():
+            if disaster["id"] == disaster_id:
+                disaster_area = math.pi*(disaster["radius_miles"]**2)
+        if disaster_area == None:
+            return None
+        return round(helpers.counter(self.get_claim_data(), "disaster_id", disaster_id)/disaster_area, 5)
 
     # endregion
 
@@ -234,7 +271,17 @@ def get_top_three_months_with_highest_num_of_claims_desc(self):
         Returns:
             list: three strings of month and year, descending order of highest claims
         """
-
-        pass
+        def format_date(date):
+            return helpers.date_to_datetime(date).strftime("%B %Y")
+        disaster_to_month = {}
+        for disaster in self.get_disaster_data():
+            disaster_to_month[disaster["id"]] = format_date(disaster["declared_date"])
+        month_to_claims = {}
+        for claim in self.get_claim_data():
+            month = disaster_to_month[claim["disaster_id"]]
+            month_to_claims[month] = month_to_claims.get(month, 0) + 1
+        months = list(month_to_claims.keys())
+        months.sort(key=(lambda month : month_to_claims[month]), reverse=True)
+        return months[:3]
 
     # endregion