Skip to content
This repository was archived by the owner on Jun 29, 2019. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions CATelcoCustomerChurnModeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@
from azureml.logging import get_azureml_logger

# initialize the logger
run_logger = get_azureml_logger()
run_logger.log('amlrealworld.ChurnPrediction.CATelcoCustomerChurnModeling','true')
run_logger = get_azureml_logger()
run_logger.log(
'amlrealworld.ChurnPrediction.CATelcoCustomerChurnModeling', 'true')

with Package.open_package('CATelcoCustomerChurnTrainingSample.dprep') as pkg:
df = pkg.dataflows[0].get_dataframe(spark=False)

columns_to_encode = list(df.select_dtypes(include=['category','object']))
columns_to_encode = list(df.select_dtypes(include=['category', 'object']))
for column_to_encode in columns_to_encode:
dummies = pd.get_dummies(df[column_to_encode])
one_hot_col_names = []
Expand All @@ -35,7 +36,7 @@
model = GaussianNB()

random_seed = 42
train, test = train_test_split(df, random_state = random_seed, test_size = 0.3)
train, test = train_test_split(df, random_state=random_seed, test_size=0.3)

target = train['churn'].values
train = train.drop('churn', 1)
Expand All @@ -45,21 +46,22 @@
expected = test['churn'].values
test = test.drop('churn', 1)
predicted = model.predict(test)
print("Naive Bayes Classification Accuracy", accuracy_score(expected, predicted))
print("Naive Bayes Classification Accuracy",
accuracy_score(expected, predicted))
# log the Naive Bayes Accuracy
run_logger.log("Naive Bayes Accuracy", accuracy_score(expected, predicted))

dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
dt.fit(train, target)
predicted = dt.predict(test)
print("Decision Tree Classification Accuracy", accuracy_score(expected, predicted))
print("Decision Tree Classification Accuracy",
accuracy_score(expected, predicted))
# log the DTree Accuracy
run_logger.log("DTree Accuracy", accuracy_score(expected, predicted))

# serialize the model on disk in the special 'outputs' folder

print ("Export the model to model.pkl")
print("Export the model to model.pkl")
f = open('./outputs/model.pkl', 'wb')
pickle.dump(dt, f)
f.close()

17 changes: 10 additions & 7 deletions CATelcoCustomerChurnModelingWithoutDprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
from azureml.logging import get_azureml_logger

# initialize the logger
run_logger = get_azureml_logger()
run_logger.log('amlrealworld.ChurnPrediction.CATelcoCustomerChurnModelingWithoutDprep','true')
run_logger = get_azureml_logger()
run_logger.log(
'amlrealworld.ChurnPrediction.CATelcoCustomerChurnModelingWithoutDprep', 'true')

# Perform Data Preparation
df = pd.read_csv('data/CATelcoCustomerChurnTrainingSample.csv')
Expand All @@ -24,7 +25,7 @@
df = df.drop('month', 1)

# One-Hot Encoding
columns_to_encode = list(df.select_dtypes(include=['category','object']))
columns_to_encode = list(df.select_dtypes(include=['category', 'object']))
for column_to_encode in columns_to_encode:
dummies = pd.get_dummies(df[column_to_encode])
one_hot_col_names = []
Expand All @@ -37,7 +38,7 @@
model = GaussianNB()

random_seed = 42
train, test = train_test_split(df, random_state = random_seed, test_size = 0.3)
train, test = train_test_split(df, random_state=random_seed, test_size=0.3)

target = train['churn'].values
train = train.drop('churn', 1)
Expand All @@ -48,19 +49,21 @@
expected = test['churn'].values
test = test.drop('churn', 1)
predicted = model.predict(test)
print("Naive Bayes Classification Accuracy", accuracy_score(expected, predicted))
print("Naive Bayes Classification Accuracy",
accuracy_score(expected, predicted))
# Log the Naive Bayes accuracy
run_logger.log("Naive Bayes Accuracy", accuracy_score(expected, predicted))

dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
dt.fit(train, target)
predicted = dt.predict(test)
print("Decision Tree Classification Accuracy", accuracy_score(expected, predicted))
print("Decision Tree Classification Accuracy",
accuracy_score(expected, predicted))
# log the DTree Accuracy
run_logger.log("DTree Accuracy", accuracy_score(expected, predicted))

# serialize the model on disk in the special 'outputs' folder
print ("Export the model to outputs/model.pkl")
print("Export the model to outputs/model.pkl")
f = open('./outputs/model.pkl', 'wb')
pickle.dump(model, f)
f.close()
Loading