-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrunnning_back_code
More file actions
174 lines (159 loc) · 4.03 KB
/
runnning_back_code
File metadata and controls
174 lines (159 loc) · 4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
-- inserting data into running_back from csv
COPY running_back
FROM
'C:\Users\bhamm\OneDrive\Documents\FFdb_raw\mySQL\RB.csv'
DELIMITER ','
CSV HEADER;
-- counting week, player (should have 614), games, and season rows
SELECT
COUNT(DISTINCT week),
COUNT(DISTINCT player),
COUNT(DISTINCT games),
COUNT(DISTINCT season)
FROM
running_back;
-- filtering out and deleting runningbacks with no targets or carries in 2023
-- players without 2023 stats add no value to the database
-- first I first count the number of players being dropped before dropping them
SELECT
player
FROM
running_back
WHERE
season = 2023
GROUP BY
player
HAVING
SUM(targets) = 0
AND SUM(carries) = 0;
-- the results of the above query shows 194 players that needed dropped
-- dropping players
DELETE FROM running_back
WHERE player IN (
SELECT
player
FROM
running_back
WHERE
season = 2023
GROUP BY
player
HAVING
SUM(targets) = 0
AND SUM(carries) = 0);
-- running the query below returns 420 players which is 194 less than the we started with
SELECT
COUNT(DISTINCT player)
FROM
running_back;
-- inserting a new field called position
-- this field will help filter data once we model the database
ALTER TABLE running_back
ADD COLUMN position varchar(3);
-- Assigning the position abbreviation to the new field
UPDATE running_back
SET position = 'RB';
-- trimming all whitespaces from text fields
SELECT
trim(player),
trim(team_code),
trim(position)
FROM
running_back;
-- checking for duplicates by cross referencing games played per season
-- creating a pivot table using the FILTER clause to display games played per season
-- max games played per season from 2023-2021 = 17 and from 2020-2018 = 16
-- players with more games played than that suggest errors in the data
SELECT
DISTINCT player,
COALESCE(SUM(games) FILTER(WHERE season = 2023),0) AS "2023",
COALESCE(SUM(games) FILTER(WHERE season = 2022),0) AS "2022",
COALESCE(SUM(games) FILTER(WHERE season = 2021),0) AS "2021",
COALESCE(SUM(games) FILTER(WHERE season = 2020),0) AS "2020",
COALESCE(SUM(games) FILTER(WHERE season = 2019),0) AS "2019",
COALESCE(SUM(games) FILTER(WHERE season = 2018),0) AS "2018"
FROM
running_back
GROUP BY
player
ORDER BY
"2020" DESC,
player;
-- checking Raheem Mostert who had over 16 games in 2019
SELECT *
FROM
running_back
WHERE
player = 'Raheem Mostert'
AND season = 2019
ORDER BY
week;
-- this shows dupicate values for week 3 & 4 also week 7 has no values
-- fantasydata.com shows he did not play in week 4 and week 7 was a bye
-- I will delete the week 4 record and change week 7 game to 0 from 1
DELETE FROM
running_back
WHERE
player = 'Raheem Mostert'
AND season = 2019
AND week = 4;
-- now updating the game for week 7 to 0 from 1
UPDATE
running_back
SET
games = 0
WHERE
player = 'Raheem Mostert'
AND season = 2019
AND week = 7;
-- Now using the same process to correct Ty Montgomery II
SELECT *
FROM
running_back
WHERE
player = 'Ty Montgomery II'
AND season = 2019
ORDER BY
week;
DELETE FROM
running_back
WHERE
player = 'Ty Montgomery II'
AND season = 2019
AND week = 4;
UPDATE
running_back
SET
games = 0
WHERE
player = 'Ty Montgomery II'
AND season = 2019
AND week = 7;
-- After using trim, duplicate player names still appear indicating white space still exists
-- Changing the year in the following WHERE clause to see if duplicate players appear for each year
SELECT
DISTINCT(player)
FROM
running_back
WHERE
season = 2023
ORDER BY
player;
-- Noticed duplicate names only appear in 2023
-- Will add fpts to the above query to identify the week with the duplicate names
SELECT
DISTINCT(player),
SUM(fpts)
FROM
running_back
WHERE
season = 2023
GROUP BY
player
ORDER BY
player;
-- after cross referencing fpts from the query above it appears player in week 1 has the extra whitespace
-- replacing the player value of week 1 with the correct value of player from week 2
UPDATE running_back
SET player = 'Zamir White'
WHERE player ILIKE '%zamir%' AND season = 2023 AND position = 'RB'