-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmodel.py
More file actions
203 lines (163 loc) · 8.33 KB
/
model.py
File metadata and controls
203 lines (163 loc) · 8.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import requests
import io
# Function to call the UV Index API
def get_uv_index_data(city, state):
url = f"https://data.epa.gov/efservice/getEnvirofactsUVHOURLY/CITY/{city.upper()}/STATE/{state.upper()}/CSV"
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
return None
# Function to process UV Index data
def process_uv_index_data(city, state):
uv_data = get_uv_index_data(city, state)
if uv_data:
uv_df = pd.read_csv(io.StringIO(uv_data))
uv_df.to_csv('phoenix_uv_index.csv', index=False)
uv_df['DATE TIME'] = pd.to_datetime(uv_df['DATE TIME'])
uv_df['DATE'] = uv_df['DATE TIME'].dt.date
daily_uv_index = uv_df.groupby('DATE')['UV VALUE'].mean().reset_index()
daily_uv_index.rename(columns={'UV VALUE': 'UV_Index'}, inplace=True)
daily_uv_index['Region'] = 'Phoenix'
return daily_uv_index
else:
print("Failed to retrieve UV Index data.")
return None
# Function to process Tree Canopy data
def process_tree_canopy_data(file_path):
tree_canopy_data = pd.read_csv(file_path)
tree_canopy_summary = tree_canopy_data.groupby('city').agg({
'height_M': 'mean',
'diameter_breast_height_CM': 'mean'
}).reset_index()
tree_canopy_summary.rename(columns={'height_M': 'Avg_Tree_Height',
'diameter_breast_height_CM': 'Avg_Tree_Diameter'}, inplace=True)
tree_canopy_summary['Region'] = 'Phoenix'
return tree_canopy_summary
# Function to process Walkable Areas data
def process_walkable_areas_data(file_path):
walkable_areas_data = pd.read_csv(file_path)
walkable_areas_data['Region'] = 'Phoenix'
walkable_areas_data['Walkable_Areas'] = 1
return walkable_areas_data
# Function to process Zoning Laws data
def process_zoning_laws_data(file_path):
zoning_laws_data = pd.read_csv(file_path)
zoning_laws_summary = zoning_laws_data[['OBJECTID', 'ZONING']].copy()
zoning_laws_summary['Region'] = 'Phoenix'
return zoning_laws_summary
# Function to process Average Precipitation data
def process_avg_precipitation_data(file_path):
avg_precipitation_data = pd.read_csv(file_path)
avg_precipitation_summary = avg_precipitation_data.groupby('LEGEND').agg({
'ANNUAL_': 'mean'
}).reset_index()
avg_precipitation_summary.rename(
columns={'ANNUAL_': 'Avg_Precipitation'}, inplace=True)
avg_precipitation_summary['Region'] = 'Phoenix'
return avg_precipitation_summary
# Function to determine heat mitigation infrastructure value
def determine_heat_mitigation_infra(row):
if row['UV_Index'] > 7:
return 'High'
elif row['Avg_Tree_Height'] > 5 and row['Avg_Tree_Diameter'] > 10:
return 'Medium'
elif row['Walkable_Areas'] == 1:
return 'Medium'
else:
return 'Low'
# Fetch and process data
daily_uv_index = process_uv_index_data('phoenix', 'az')
tree_canopy_summary = process_tree_canopy_data('phx_treeLocations.csv')
walkable_areas_summary = process_walkable_areas_data('Walkable_Urban_Code.csv')
zoning_laws_summary = process_zoning_laws_data('Zoning_Index.csv')
avg_precipitation_summary = process_avg_precipitation_data('precipitation.csv')
# Merge data
if daily_uv_index is not None:
merged_data = daily_uv_index.merge(zoning_laws_summary, on='Region') \
.merge(tree_canopy_summary, on='Region') \
.merge(walkable_areas_summary, on='Region') \
.merge(avg_precipitation_summary, on='Region')
# Fill missing values in merged data
merged_data.fillna(0, inplace=True)
# Apply the function to determine heat mitigation infrastructure value
merged_data['Heat_Mitigation_Infra'] = merged_data.apply(
determine_heat_mitigation_infra, axis=1)
# Select features and target
features = ['UV_Index', 'ZONING', 'Avg_Tree_Height',
'Avg_Tree_Diameter', 'Walkable_Areas', 'Avg_Precipitation']
X = merged_data[features]
y = merged_data['Heat_Mitigation_Infra']
# Encode variables
X = pd.get_dummies(X, drop_first=True)
# Train and split data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42)
# Train model with cross-validation
model = DecisionTreeClassifier()
cross_val_scores = cross_val_score(model, X_train, y_train, cv=5)
print(f'Cross-Validation Scores: {cross_val_scores}')
print(f'Average Cross-Validation Score: {cross_val_scores.mean()}')
# Fit the model and evaluate
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report(y_test, y_pred))
# Save model
joblib.dump(model, 'decision_tree_model.pkl')
# Function to predict new data and print recommendations
def predict_infrastructure(data, model, columns):
new_data = pd.DataFrame(data)
new_data = pd.get_dummies(new_data)
new_data = new_data.reindex(columns=columns, fill_value=0)
# Perform predictions
prediction = model.predict(new_data)
# Override predictions based on conditions
recommendations = []
for i in range(len(new_data)):
if 'ZONING_Residential' in new_data.columns and new_data.loc[i, 'ZONING_Residential'] == 1:
recommendations.append(
'Residential: Solar panel installation, more trees, and green areas')
elif 'ZONING_Commercial' in new_data.columns and new_data.loc[i, 'ZONING_Commercial'] == 1:
recommendations.append(
'Commercial: Cool roofs, green walls, and urban trees')
elif 'ZONING_Industrial' in new_data.columns and new_data.loc[i, 'ZONING_Industrial'] == 1:
recommendations.append(
'Industrial: Reflective materials, water bodies, and green buffers')
else:
recommendations.append(prediction[i])
return recommendations
# Load model and make predictions
loaded_model = joblib.load('decision_tree_model.pkl')
# Example data based on typical values from the dataset
new_area_data = [
{'UV_Index': merged_data['UV_Index'].mean(), 'ZONING': 'Residential', 'Avg_Tree_Height': merged_data['Avg_Tree_Height'].mean(
), 'Avg_Tree_Diameter': merged_data['Avg_Tree_Diameter'].mean(), 'Walkable_Areas': 1, 'Avg_Precipitation': merged_data['Avg_Precipitation'].mean()},
{'UV_Index': merged_data['UV_Index'].quantile(0.75), 'ZONING': 'Commercial', 'Avg_Tree_Height': merged_data['Avg_Tree_Height'].quantile(
0.75), 'Avg_Tree_Diameter': merged_data['Avg_Tree_Diameter'].quantile(0.75), 'Walkable_Areas': 0, 'Avg_Precipitation': merged_data['Avg_Precipitation'].quantile(0.75)},
{'UV_Index': merged_data['UV_Index'].quantile(0.25), 'ZONING': 'Industrial', 'Avg_Tree_Height': 0.0, 'Avg_Tree_Diameter': 0.0,
'Walkable_Areas': 1, 'Avg_Precipitation': merged_data['Avg_Precipitation'].quantile(0.25)}
]
predictions = predict_infrastructure(
new_area_data, loaded_model, X_train.columns)
# Print predictions with zoning type and detailed recommendations
zoning_types = ['Residential', 'Commercial', 'Industrial']
for i, pred in enumerate(predictions):
if zoning_types[i] == 'Residential':
print(
f'Predicted Heat Mitigating Infrastructure for {zoning_types[i]} area: {pred} - Recommendations: Solar panel installation, more trees, and green areas')
elif zoning_types[i] == 'Commercial':
print(
f'Predicted Heat Mitigating Infrastructure for {zoning_types[i]} area: {pred} - Recommendations: Cool roofs, green walls, and urban trees')
elif zoning_types[i] == 'Industrial':
print(
f'Predicted Heat Mitigating Infrastructure for {zoning_types[i]} area: {pred} - Recommendations: Reflective materials, water bodies, and green buffers')
else:
print("UV Index data is not available. Model training and prediction cannot proceed.")