-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataAnalysis.py
More file actions
120 lines (90 loc) · 3.04 KB
/
DataAnalysis.py
File metadata and controls
120 lines (90 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*- coding: utf-8 -*-
"""kovai.co.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1kMXmun1-mUu2QX5GGkLSdOlzV46o41BG
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data=pd.read_csv("/content/Daily_Public_Transport_Passenger_Boardings_By_Ticket_Type_20240513.csv")
data
data['Paper Ticket'].plot(kind='hist', bins=20, title='Paper Ticket')
plt.show()
data['MyWay'].plot(kind='hist', bins=20, title='MyWay')
plt.show()
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')
filtered_df = data.loc[(data['Date'] >= '01-04-2023') & (data['Date'] <= '30-04-2024')]
filtered_df
filtered_df['Date'].min()
filtered_df['Date'].max()
filtered_df.shape
import seaborn as sns
data2=filtered_df
data2['month'] = data2['Date'].dt.month
data2['year'] = data2['Date'].dt.year
plt.figure(figsize=(7, 5))
sns.lineplot(x=data2.month, y=data['MyWay'],ci=None)
plt.xlabel('Month')
plt.ylabel('MyWay')
plt.title('Seasonal Plot')
plt.xticks(range(1, 13), labels=[
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.grid(True)
plt.show()
plt.figure(figsize=(7, 5))
sns.lineplot(x=data2.month, y=data['Paper Ticket'],ci=None)
plt.xlabel('Month')
plt.ylabel('MyWay')
plt.title('Seasonal Plot')
plt.xticks(range(1, 13), labels=[
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.grid(True)
plt.show()
plt.figure(figsize = (10,6))
sns.boxplot(x = data2.month, y = data2['MyWay'])
plt.xlabel('month')
plt.show()
plt.figure(figsize = (10,6))
sns.boxplot(x = data2.year, y = data2['MyWay'])
plt.xlabel('year')
plt.show()
plt.figure(figsize=(7, 5))
sns.histplot(data['Paper Ticket'], kde=True)
plt.xlabel('PaperTicket')
plt.ylabel('Frequency')
plt.title('Histogram and Density Plot')
plt.grid(False)
plt.show()
plt.figure(figsize = (10, 6))
sns.boxplot(x =data2.year, y = 'Paper Ticket', data = data2)
plt.show()
plt.figure(figsize = (10,6))
sns.boxplot(x = data2.month, y = data2['Paper Ticket'])
plt.xlabel('month')
plt.show()
data2.plot(x="month", y=["MyWay", "Paper Ticket"], kind="line")
plt.show()
total_myway = data2['MyWay'].sum()
total_paper_tickets = data2['Paper Ticket'].sum()
plt.figure(figsize=(8, 5))
ticket_types = ['MyWay', 'Paper Tickets']
ticket_counts = [total_myway, total_paper_tickets]
plt.bar(ticket_types, ticket_counts, color=['blue', 'green'])
plt.xlabel('Ticket Types')
plt.ylabel('Total Number of Tickets Sold')
plt.title('Total Usage of MyWay vs Paper Tickets')
plt.show()
data2['day_of_week'] = data2['Date'].dt.day_name()
average_sales = data2.groupby('day_of_week')[['MyWay', 'Paper Ticket']].mean()
plt.figure(figsize=(10, 6))
average_sales.plot(kind='bar', color=['blue', 'green'])
# Adding labels and title
plt.xlabel('Day of the Week')
plt.ylabel('Average Number of Tickets Sold')
plt.title('Average Ticket Sales by Day of the Week')
# Display the plot
plt.xticks(rotation=45) # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()