Skip to content

Commit a64620b

Browse files
Merge pull request #37 from cuappdev/revert-duplicate-games-fix
Revert duplicate games fix
2 parents 721f0f8 + bbe2408 commit a64620b

File tree

6 files changed

+13
-249
lines changed

6 files changed

+13
-249
lines changed

src/database.py

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def keep_connection_alive():
4949
# Access the database
5050
db = client[os.getenv("MONGO_DB", "score_db")]
5151

52+
5253
def setup_database_indexes():
5354
"""Set up MongoDB indexes for optimal query performance"""
5455
try:
@@ -64,31 +65,6 @@ def setup_database_indexes():
6465

6566
# Index for sorting operations
6667
game_collection.create_index([("date", -1)], background=True)
67-
68-
# Index to have unique games so we won't add duplicates
69-
game_collection.create_index(
70-
[
71-
("sport", 1),
72-
("gender", 1),
73-
("date", 1),
74-
("opponent_id", 1),
75-
("state", 1),
76-
],
77-
unique=True,
78-
background=True
79-
)
80-
81-
# Additional index for tournament games (without opponent_id)
82-
game_collection.create_index(
83-
[
84-
("sport", 1),
85-
("gender", 1),
86-
("date", 1),
87-
("city", 1),
88-
("state", 1),
89-
],
90-
background=True
91-
)
9268

9369
print("✅ MongoDB indexes created successfully")
9470
except Exception as e:

src/repositories/game_repository.py

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -130,56 +130,6 @@ def find_by_key_fields(city, date, gender, location, opponent_id, sport, state):
130130

131131
return [Game.from_dict(game) for game in games]
132132

133-
@staticmethod
134-
def find_by_tournament_key_fields(city, date, gender, location, sport, state):
135-
"""
136-
Find tournament games by location and date (excluding opponent_id).
137-
This is used when we need to find a tournament game that might have a placeholder team.
138-
Uses flexible matching to handle TBD/TBA values.
139-
"""
140-
game_collection = db["game"]
141-
142-
# Build flexible query that can handle TBD/TBA values
143-
query = {
144-
"date": date,
145-
"gender": gender,
146-
"sport": sport,
147-
}
148-
149-
# For city, state, and location, use flexible matching
150-
# This allows finding games even when TBD/TBA values change to real values
151-
city_conditions = []
152-
if city:
153-
city_conditions.append(city)
154-
else:
155-
city_conditions = [None]
156-
157-
state_conditions = []
158-
if state:
159-
state_conditions.append(state)
160-
else:
161-
state_conditions = [None]
162-
163-
location_conditions = []
164-
if location:
165-
location_conditions.append(location)
166-
else:
167-
location_conditions = [None]
168-
169-
query["city"] = {"$in": city_conditions}
170-
query["state"] = {"$in": state_conditions}
171-
query["location"] = {"$in": location_conditions}
172-
173-
games = list(game_collection.find(query))
174-
175-
if not games:
176-
return None
177-
178-
if len(games) == 1:
179-
return Game.from_dict(games[0])
180-
181-
return [Game.from_dict(game) for game in games]
182-
183133
@staticmethod
184134
def find_by_sport(sport):
185135
"""
@@ -206,31 +156,3 @@ def find_by_sport_gender(sport, gender):
206156
game_collection = db["game"]
207157
games = game_collection.find({"sport": sport, "gender": gender})
208158
return [Game.from_dict(game) for game in games]
209-
210-
@staticmethod
211-
def find_games_by_sport_gender_after_date(sport, gender, after_date=None):
212-
"""
213-
Find games for a specific sport and gender, optionally after a specific date.
214-
This method returns raw game data without team information.
215-
"""
216-
game_collection = db["game"]
217-
218-
query = {
219-
"sport": sport,
220-
"gender": gender
221-
}
222-
223-
if after_date:
224-
query["utc_date"] = {"$gt": after_date}
225-
226-
games = game_collection.find(query)
227-
return [Game.from_dict(game) for game in games]
228-
229-
@staticmethod
230-
def delete_games_by_ids(game_ids):
231-
"""
232-
Delete games by their IDs.
233-
"""
234-
game_collection = db["game"]
235-
result = game_collection.delete_many({"_id": {"$in": game_ids}})
236-
return result.deleted_count

src/scrapers/games_scraper.py

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
from src.utils.convert_to_utc import convert_to_utc
55
from src.utils.constants import *
66
from src.scrapers.game_details_scrape import scrape_game
7-
from src.utils.helpers import get_dominant_color, normalize_game_data, is_tournament_placeholder_team, is_cornell_loss
7+
from src.utils.helpers import get_dominant_color
88
import base64
99
import re
10-
from src.database import db
10+
import html
1111
import threading
1212

1313

@@ -164,8 +164,6 @@ def process_game_data(game_data):
164164
Args:
165165
game_data (dict): A dictionary containing the game data.
166166
"""
167-
168-
game_data = normalize_game_data(game_data)
169167
location_data = game_data["location"].split("\n")
170168
geo_location = location_data[0]
171169
if (",") not in geo_location:
@@ -234,28 +232,16 @@ def process_game_data(game_data):
234232
if str(final_box_cor_score) != str(cor_final) or str(final_box_opp_score) != str(opp_final):
235233
game_data["score_breakdown"] = game_data["score_breakdown"][::-1]
236234

237-
# Try to find by tournament key fields to handle placeholder teams
238-
curr_game = GameService.get_game_by_tournament_key_fields(
235+
# finds any existing game with the same key fields regardless of time
236+
curr_game = GameService.get_game_by_key_fields(
239237
city,
240238
game_data["date"],
241239
game_data["gender"],
242240
location,
241+
team.id,
243242
game_data["sport"],
244243
state
245244
)
246-
247-
# If no tournament game found, try the regular lookup with opponent_id
248-
if not curr_game:
249-
curr_game = GameService.get_game_by_key_fields(
250-
city,
251-
game_data["date"],
252-
game_data["gender"],
253-
location,
254-
team.id,
255-
game_data["sport"],
256-
state
257-
)
258-
259245
if isinstance(curr_game, list):
260246
if curr_game:
261247
curr_game = curr_game[0]
@@ -267,19 +253,8 @@ def process_game_data(game_data):
267253
"result": game_data["result"],
268254
"box_score": game_data["box_score"],
269255
"score_breakdown": game_data["score_breakdown"],
270-
"utc_date": utc_date_str,
271-
"city": city,
272-
"location": location,
273-
"state": state
256+
"utc_date": utc_date_str
274257
}
275-
276-
current_team = TeamService.get_team_by_id(curr_game.opponent_id)
277-
if current_team and is_tournament_placeholder_team(current_team.name):
278-
updates["opponent_id"] = team.id
279-
280-
if is_cornell_loss(game_data["result"]) and game_data["utc_date"]:
281-
GameService.handle_tournament_loss(game_data["sport"], game_data["gender"], game_data["utc_date"])
282-
283258
GameService.update_game(curr_game.id, updates)
284259
return
285260

@@ -297,5 +272,5 @@ def process_game_data(game_data):
297272
"score_breakdown": game_data["score_breakdown"],
298273
"utc_date": utc_date_str
299274
}
300-
275+
301276
GameService.create_game(game_data)

src/services/game_service.py

Lines changed: 0 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from src.repositories.game_repository import GameRepository
22
from src.models.game import Game
33
from src.services.team_service import TeamService
4-
from src.utils.helpers import is_tournament_placeholder_team
54

65

76
class GameService:
@@ -34,7 +33,6 @@ def create_game(data):
3433
opponent_id = data.get("opponent_id")
3534
if not TeamService.get_team_by_id(opponent_id):
3635
raise ValueError(f"Opponent team with id {opponent_id} does not exist.")
37-
3836
game = Game(**data)
3937
GameRepository.insert(game)
4038
return game
@@ -71,16 +69,6 @@ def get_game_by_key_fields(city, date, gender, location, opponent_id, sport, sta
7169
city, date, gender, location, opponent_id, sport, state
7270
)
7371

74-
@staticmethod
75-
def get_game_by_tournament_key_fields(city, date, gender, location, sport, state):
76-
"""
77-
Retrieve a tournament game by location and date (excluding opponent_id).
78-
This is used when we need to find a tournament game that might have a placeholder team.
79-
"""
80-
return GameRepository.find_by_tournament_key_fields(
81-
city, date, gender, location, sport, state
82-
)
83-
8472
@staticmethod
8573
def get_games_by_sport(sport):
8674
"""
@@ -101,50 +89,3 @@ def get_games_by_sport_gender(sport, gender):
10189
Retrieves all game by its sport and gender.
10290
"""
10391
return GameRepository.find_by_sport_gender(sport, gender)
104-
105-
@staticmethod
106-
def get_tournament_games_by_sport_gender(sport, gender, after_date=None):
107-
"""
108-
Find tournament games (with placeholder team names) for a specific sport and gender.
109-
Optionally filter by games after a specific date.
110-
"""
111-
games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
112-
tournament_games = []
113-
114-
for game in games:
115-
team = TeamService.get_team_by_id(game.opponent_id)
116-
if team and is_tournament_placeholder_team(team.name):
117-
tournament_games.append(game)
118-
119-
return tournament_games
120-
121-
@staticmethod
122-
def delete_tournament_games_by_sport_gender(sport, gender, after_date=None):
123-
"""
124-
Delete tournament games (with placeholder team names) for a specific sport and gender.
125-
Optionally filter by games after a specific date.
126-
"""
127-
games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
128-
tournament_game_ids = []
129-
130-
for game in games:
131-
team = TeamService.get_team_by_id(game.opponent_id)
132-
if team and is_tournament_placeholder_team(team.name):
133-
tournament_game_ids.append(game.id)
134-
135-
if tournament_game_ids:
136-
return GameRepository.delete_games_by_ids(tournament_game_ids)
137-
return 0
138-
139-
@staticmethod
140-
def handle_tournament_loss(sport, gender, loss_date):
141-
"""
142-
Handle when a Cornell team loses in a tournament by deleting future tournament games.
143-
144-
Args:
145-
sport (str): The sport of the team that lost
146-
gender (str): The gender of the team that lost
147-
loss_date (datetime): The date when the team lost
148-
"""
149-
deleted_count = GameService.delete_tournament_games_by_sport_gender(sport, gender, loss_date)
150-
return deleted_count

src/services/team_service.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from src.repositories import TeamRepository
22
from src.models.team import Team
33

4+
45
class TeamService:
56
@staticmethod
67
def get_all_teams():
@@ -12,25 +13,14 @@ def get_all_teams():
1213
@staticmethod
1314
def create_team(team_data):
1415
"""
15-
Create a new team, or update it if it already exists.
16-
16+
Create a new team.
17+
1718
Args:
1819
team_data (dict): The data for the new team.
20+
1921
Returns:
2022
Team: The created team.
2123
"""
22-
name = team_data.get("name")
23-
if not name:
24-
raise ValueError("Team name is required to create a team.")
25-
26-
existing = TeamService.get_team_by_name(name)
27-
if existing:
28-
if isinstance(existing, list) and existing:
29-
existing = existing[0]
30-
31-
TeamService.update_team(existing.id, team_data)
32-
return existing
33-
3424
team = Team(**team_data)
3525
TeamRepository.insert(team)
3626
return team

src/utils/helpers.py

Lines changed: 1 addition & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -54,44 +54,4 @@ def get_dominant_color(image_url, white_threshold=200, black_threshold=50):
5454
return hex_color
5555
except Exception as e:
5656
logging.error(f"Error in get_dominant_color for {image_url}: {e}")
57-
return default_color
58-
59-
def normalize_game_data(data: dict):
60-
"""
61-
Normalize placeholder values like TBA/TBD into None.
62-
"""
63-
placeholders = {"TBA", "TBD", "tba", "tbd"}
64-
65-
for field in ["time", "city", "state"]:
66-
if data.get(field) in placeholders:
67-
data[field] = None
68-
69-
return data
70-
71-
def is_tournament_placeholder_team(team_name: str):
72-
"""
73-
Check if a team name is a tournament placeholder.
74-
"""
75-
76-
placeholder_team_names = [
77-
"First Round", "Second Round", "Third Round", "Quarterfinals",
78-
"College Cup Semifinals", "College Cup Championship Game",
79-
"ECAC Hockey First Round", "ECAC Hockey Quarterfinals",
80-
"ECAC Hockey Semifinals", "ECAC Hockey Championship Game",
81-
"Regional Semifinals", "Regional Championship", "National Semifinals",
82-
"TBD", "National Championship", "NCAA Wrestling Championships", "NCAA Northeast Regional CHampionships",
83-
"NCAA Cross Country Championships",
84-
]
85-
return team_name in placeholder_team_names
86-
87-
def is_cornell_loss(result: str):
88-
"""
89-
Check if the result indicates a Cornell loss.
90-
"""
91-
92-
if not result:
93-
return False
94-
95-
# Common loss indicators in result strings
96-
loss_indicators = ["L", "Loss", "loss", "Defeated", "defeated"]
97-
return any(indicator in result for indicator in loss_indicators)
57+
return default_color

0 commit comments

Comments
 (0)