Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM ipython/notebook

RUN pip install pandas
RUN pip install datetime

RUN mkdir /Docker_A3
WORKDIR /Docker_A3
RUN mkdir ./code_files ./data

COPY code_files/*.ipynb ./code_files/
COPY data/*.csv ./data

WORKDIR /Docker_A3/code_files
295 changes: 295 additions & 0 deletions code_files/Python_Assignment_02.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import datetime\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"hospitaldata = pd.read_csv(r'D:\\DIH\\Assignments\\FarooqUrRehman_KCI_Python_Assignment2\\hospitaldata.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 1\n",
"hospitaldata = hospitaldata.rename(columns=lambda x: x.replace('.', ''))\n",
"print(hospitaldata.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 2\n",
"hospitaldata['WeekDay'] = pd.to_datetime(hospitaldata['Date']).dt.weekday_name\n",
"WeekDay_Counts=hospitaldata[['WeekDay','id']].groupby('WeekDay').agg('count')\n",
"WeekDay_Max = WeekDay_Counts['id'].max()\n",
"print(WeekDay_Counts[(WeekDay_Counts['id'] == WeekDay_Max)])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 3\n",
"\n",
"Average_Age = hospitaldata.dropna(subset =['Age'])\n",
"Average_Age = Average_Age[(Average_Age['Age']!='-')]\n",
"Average_Age['Age_Years']=Average_Age['Age'].str.extract('(\\d+)').astype(int)\n",
"Average_Age['Age_Years']=np.where(Average_Age['Age'].str.contains('M') == True, Average_Age['Age_Years']/12.0, Average_Age['Age_Years'])\n",
"print(Average_Age['Age_Years'].mean())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 4\n",
"Children_Count = Average_Age[(Average_Age['Age_Years'] >=1) & (Average_Age['Age_Years'] <=12)]\n",
"print(Children_Count['Age_Years'].count())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 5\n",
"Gender_Procedure = hospitaldata.dropna(subset =['Sex'])\n",
"Gender_Procedure = Gender_Procedure.dropna(subset =['Procedure'])\n",
"Gender_Procedure = Gender_Procedure[(Gender_Procedure['Sex']!='-')]\n",
"Gender_Procedure['Sex']=Gender_Procedure['Sex'].str.replace(\"m\",\"M\")\n",
"Gender_Procedure['Sex']=Gender_Procedure['Sex'].str.replace(\"f\",\"F\")\n",
"print(Gender_Procedure.groupby('Sex')['Procedure'].value_counts().nlargest(2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 6\n",
"Doctor_Earnings = hospitaldata[(hospitaldata['ConsultingDoctor'].str.contains(\"Dr\")==True)]\n",
"print(Doctor_Earnings.groupby(\"ConsultingDoctor\")['AmountReceived'].max().nlargest(1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 7\n",
"Procedure_Earnings=hospitaldata.dropna(subset =['Procedure'])\n",
"Procedure_Earnings=hospitaldata.dropna(subset =['AmountReceived'])\n",
"print(Procedure_Earnings.groupby(\"Procedure\")['AmountReceived'].sum().nlargest(1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 8\n",
"VF = hospitaldata.dropna(subset =['Time'])\n",
"VF = VF[(VF['Time']!='-')]\n",
"#Visit_Frequency['AM_PM']=np.where(Average_Age['Time'].str.contains('PM') == True, \"PM\",\"AM\")\n",
"VF['VisitHour']=pd.to_datetime(VF['Time'])\n",
"VF['VisitHour']=VF['VisitHour'].dt.hour\n",
"VF1=VF[['VisitHour','id']]\n",
"VF1.groupby('VisitHour')['id'].count().nlargest(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 9\n",
"VF['TimeBracket']= np.where((VF['VisitHour'] >=6) & (VF['VisitHour'] <12), \"Morning\",np.where((VF['VisitHour'] >=12) & (VF['VisitHour'] <16), \"Afternoon\",np.where((VF['VisitHour'] >=16) & (VF['VisitHour'] <19), \"Evening\",\"Night\")))\n",
"VF[['TimeBracket','VisitHour']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 10 & 11\n",
"VF2=VF[['id','Date']].groupby('id').count()\n",
"VF2[VF2['Date'] > 1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Answer 11\n",
"VF2=VF[['id','Date']].groupby('id').count()\n",
"VF2[VF2['Date'] > 1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Answer 12\n",
"Gender_Procedure2=Gender_Procedure[['id','Procedure','Date']].groupby(['id','Procedure']).count()\n",
"Gender_Procedure2[Gender_Procedure2['Date']>1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Answer 13\n",
"Average_Age['Sex']=Average_Age['Sex'].str.replace(\"m\",\"M\")\n",
"Average_Age['Sex']=Average_Age['Sex'].str.replace(\"f\",\"F\")\n",
"Average_Age.groupby(\"Sex\")['Age_Years'].mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Answer 14\n",
"Gender_Procedure = Gender_Procedure.dropna(subset =['AmountBalance'])\n",
"Gender_Procedure = Gender_Procedure[(Gender_Procedure['AmountBalance']!=' - ')]\n",
"Gender_Procedure['AmountBalance1'] = Gender_Procedure['AmountBalance'].str.replace(',','')\n",
"Gender_Procedure['AmountBalance1'] = Gender_Procedure['AmountBalance1'].str.extract('(\\d+)').astype(float)\n",
"sum(Gender_Procedure['AmountBalance1'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Answer 15\n",
"Consultation_Earnings=Procedure_Earnings[(Procedure_Earnings['Procedure'] == 'Consultation')]\n",
"print(sum(Consultation_Earnings['AmountReceived']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Answer 16\n",
"Age_Charges_Cor = Average_Age.dropna(subset =['TotalCharges'])\n",
"Age_Charges_Cor = Age_Charges_Cor[(Age_Charges_Cor['TotalCharges']!='Cancelled')]\n",
"np.correlate(Age_Charges_Cor['Age_Years'],Age_Charges_Cor['TotalCharges'].str.extract('(\\d+)').astype(float))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Answer 17\n",
"Average_Age['Age_Group']= np.where((Average_Age['Age_Years'] <=15), \"1-15 Yrs\",np.where((Average_Age['Age_Years'] >15) & (Average_Age['Age_Years'] <=30), \"16-30 Yrs\",np.where((Average_Age['Age_Years'] >30) & (Average_Age['Age_Years'] <=45), \"31-45 Yrs\",\"Above 45 Yrs\")))\n",
"print(Average_Age.groupby('Age_Group')['id'].count())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Answer 18\n",
"XRay_Scalling_Earnings=Procedure_Earnings[(Procedure_Earnings['Procedure'] == 'X Ray') | (Procedure_Earnings['Procedure'] == 'Scalling')]\n",
"print(sum(XRay_Scalling_Earnings['AmountReceived']))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading