Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions docs/wbc_statcast_search.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# WBC Statcast Search

## `wbc_statcast_search`

Function to search for WBC Statcast pitch-level data with custom filters based on Baseball Savant's [WBC Statcast Search](https://baseballsavant.mlb.com/statcast-search-world-baseball-classic).

**Notification:** If the search range is too wide, the response time will be very long.

**WBC data availability**

> From Baseball Savant:
> World Baseball Classic pitch-level Statcast data is available beginning with the 2023 tournament. Bat tracking data will additionally be available beginning with the 2026 tournament.

**Examples**

```python
from baseball_stats_python import wbc_statcast_search

# Get all pitch data in 2023 WBC
wbc_statcast_search(
season="2023"
)

# Get all pitch data in 2026 Pool Play
wbc_statcast_search(
game_type="F"
)
```

**Arguments**

| Argument | Data Type | Description | Default |
| --------------- | -------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- |
| season | `str` or `list[str]` | The season(s) to search for. | Current season |
| player_type | `str` | Player type for search result. Currently only supports `pitcher` and `batter`. | "pitcher" |
| game_type | `str` or `WbcGameType` or `list[str or WbcGameType]` | Game type (`F`, `CL`, `CD`, `CW`). Also support `all` to select all options. Can check enum [WbcGameType](../enums/wbc.py) | `R` |
| pitchers_lookup | `str` or `list[str]` | Pitcher(s)'s mlbam_id. Can get MLBAM ID from Savant's WBC gameday | "" |
| batters_lookup | `str` or `list[str]` | Batter(s)'s mlbam_id. Can get MLBAM ID from Savant's WBC gameday | "" |
| debug | `bool` | Whether to print debug information | False |

**Use Enums**

```python
from baseball_stats_python.enums.minor import WbcGameType

# Get Semi-Finals data
wbc_statcast_search(
game_type=WbcGameType.SEMI_FINALS
)

```

**Return**

A DataFrame with columns can be found from Baseball Savant's [CSV Docs](https://baseballsavant.mlb.com/csv-docs).

## `wbc_statcast_pitcher_search`

Based on `wbc_statcast_search`, but only returns pitcher data.

**Examples**

```python
from baseball_stats_python import wbc_statcast_pitcher_search

# Get all pitch data of a specific pitcher
wbc_statcast_pitcher_search(
pitchers_lookup="830717"
)
```

**Arguments**

Same with `wbc_statcast_search` but only can use `pitchers_lookup` filter. If `pitchers_lookup` is not provided, it will throw an error.

## `wbc_statcast_batter_search`

Based on `wbc_statcast_search`, but only returns pitches that target batter faced.

**Examples**

```python
from baseball_stats_python import wbc_statcast_batter_search

# Get all pitch data of a specific batter
wbc_statcast_batter_search(
batters_lookup="838360"
)
```

**Arguments**

Same with `wbc_statcast_batter_search` but only can use `batters_lookup` filter. If `batters_lookup` is not provided, it will throw an error.
27 changes: 18 additions & 9 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
minor_statcast_search,
mlbam_id_search,
statcast_search,
wbc_statcast_search,
)
from src.baseball_stats_python.enums.minor import MinorGameType
from src.baseball_stats_python.enums.statcast import GameType, MlbTeam, Month


def example():
df = statcast_search(
season='2023',
pitchers_lookup='477132',
game_type=[GameType.PLAYOFFS, 'R'],
season="2023",
pitchers_lookup="477132",
game_type=[GameType.PLAYOFFS, "R"],
opponent=MlbTeam.PADRES,
month=Month.JUNE,
)
Expand All @@ -22,22 +23,29 @@ def example():

def minor_example():
df = minor_statcast_search(
season='2023', game_type=MinorGameType.REGULAR_SEASON, pitchers_lookup='678906'
season="2023", game_type=MinorGameType.REGULAR_SEASON, pitchers_lookup="678906"
)
print(df)


def mlbam_id_example():
df = mlbam_id_search('Reynolds')
df = mlbam_id_search("Lin")
print(df)


def spring_training_example():
df = statcast_search(
season='2025',
start_dt='2025-02-20',
end_dt='2025-02-20',
game_type='S',
season="2025",
start_dt="2025-02-20",
end_dt="2025-02-20",
game_type="S",
)
print(df)


def wbc_example():
df = wbc_statcast_search(
batters_lookup="838360",
)
print(df)

Expand All @@ -46,3 +54,4 @@ def spring_training_example():
# minor_example()
# mlbam_id_example()
# spring_training_example()
# wbc_example()
28 changes: 18 additions & 10 deletions src/baseball_stats_python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,24 @@
statcast_pitcher_search,
statcast_search,
)
from .statcast.wbc_statcast_search import (
wbc_statcast_batter_search,
wbc_statcast_pitcher_search,
wbc_statcast_search,
)

__all__ = [
'statcast_search',
'statcast_pitcher_search',
'statcast_batter_search',
'minor_statcast_search',
'minor_statcast_pitcher_search',
'minor_statcast_batter_search',
'mlbam_id_search',
'catcher_throwing',
'runner_basestealing',
'runner_extra_bases_taken',
"statcast_search",
"statcast_pitcher_search",
"statcast_batter_search",
"minor_statcast_search",
"minor_statcast_pitcher_search",
"minor_statcast_batter_search",
"mlbam_id_search",
"catcher_throwing",
"runner_basestealing",
"runner_extra_bases_taken",
"wbc_statcast_search",
"wbc_statcast_pitcher_search",
"wbc_statcast_batter_search",
]
2 changes: 1 addition & 1 deletion src/baseball_stats_python/constants/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
DEFAULT_SEASON = 2024
DEFAULT_SEASON = 2026
18 changes: 18 additions & 0 deletions src/baseball_stats_python/enums/wbc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from .enum_base import EnumBase


class WbcGameType(EnumBase):
"""
Enum for WBC Game Types.
Currently WBC Statcast Search only supports Pool Play, Semi-Finals, Quarter-Finals, and Championship.

POOL_PLAY = "F",
SEMI_FINALS = "CL",
QUARTER_FINALS = "CD",
CHAMPIONSHIP = "CW"
"""

POOL_PLAY = "F"
SEMI_FINALS = "CL"
QUARTER_FINALS = "CD"
CHAMPIONSHIP = "CW"
115 changes: 58 additions & 57 deletions src/baseball_stats_python/statcast/statcast_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,24 @@
from ..utils.utils import validate_date_range

logging.basicConfig()
logger = logging.getLogger('Statcast')
logger = logging.getLogger("Statcast")

session = requests.Session()

STATCAST_SEARCH_URL = 'https://baseballsavant.mlb.com/statcast_search/csv'
STATCAST_SEARCH_URL = "https://baseballsavant.mlb.com/statcast_search/csv"


def statcast_search(
season: str | list[str] = '2024',
player_type: str = 'pitcher',
season: str | list[str] = "2024",
player_type: str = "pitcher",
game_type: str | GameType | list[str | GameType] = GameType.REGULAR_SEASON,
start_dt: str = '',
end_dt: str = '',
month: str | Month | list[str | Month] = '',
pitchers_lookup: str | list[str] = '',
batters_lookup: str | list[str] = '',
team: str | MlbTeam | list[str | MlbTeam] = '',
opponent: str | MlbTeam | list[str | MlbTeam] = '',
start_dt: str = "",
end_dt: str = "",
month: str | Month | list[str | Month] = "",
pitchers_lookup: str | list[str] = "",
batters_lookup: str | list[str] = "",
team: str | MlbTeam | list[str | MlbTeam] = "",
opponent: str | MlbTeam | list[str | MlbTeam] = "",
debug: bool = False,
) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -60,49 +60,50 @@ def statcast_search(
validate_date_range(start_dt, end_dt)

params = {
'all': 'true',
'player_type': player_type,
'hfSea': get_season_param_str(season),
'hfGT': get_game_type_param_str(game_type),
'game_date_gt': start_dt,
'game_date_lt': end_dt,
'hfMo': get_month_param_str(month),
'hfTeam': get_team_param_str(team),
'hfOpponent': get_team_param_str(opponent),
'type': 'details',
"all": "true",
"player_type": player_type,
"hfSea": get_season_param_str(season),
"hfGT": get_game_type_param_str(game_type),
"game_date_gt": start_dt,
"game_date_lt": end_dt,
"hfMo": get_month_param_str(month),
"hfTeam": get_team_param_str(team),
"hfOpponent": get_team_param_str(opponent),
"type": "details",
}

if pitchers_lookup:
params['pitchers_lookup[]'] = pitchers_lookup
params["pitchers_lookup[]"] = pitchers_lookup

if batters_lookup:
params['batters_lookup[]'] = batters_lookup
params["batters_lookup[]"] = batters_lookup

print('Starting Statcast Search')
logger.debug(f'Params: {params}')
print("Starting Statcast Search")
logger.debug(f"Params: {params}")
response = session.get(STATCAST_SEARCH_URL, params=params)

logger.debug(response.url)
print(response.url)

if response.status_code == 200:
print('Statcast Search Completed')
print("Statcast Search Completed")
csv_content = io.StringIO(response.text)

return pd.read_csv(csv_content)
else:
raise Exception(
f'Failed to fetch data: {response.status_code} - {response.text}'
f"Failed to fetch data: {response.status_code} - {response.text}"
)


def statcast_pitcher_search(
pitchers_lookup: str | list[str],
season: str | list[str] = '2024',
season: str | list[str] = "2024",
game_type: str | GameType | list[str | GameType] = GameType.REGULAR_SEASON,
start_dt: str = '',
end_dt: str = '',
month: str | Month | list[str | Month] = '',
opponent: str | MlbTeam | list[str | MlbTeam] = '',
start_dt: str = "",
end_dt: str = "",
month: str | Month | list[str | Month] = "",
opponent: str | MlbTeam | list[str | MlbTeam] = "",
debug: bool = False,
) -> pd.DataFrame:
"""
Expand All @@ -122,31 +123,31 @@ def statcast_pitcher_search(
"""

if not pitchers_lookup:
raise ValueError('pitchers_lookup is required')
raise ValueError("pitchers_lookup is required")

params = {
'pitchers_lookup': pitchers_lookup,
'season': season,
'player_type': 'pitcher',
'game_type': game_type,
'start_dt': start_dt,
'end_dt': end_dt,
'month': month,
'opponent': opponent,
'debug': debug,
"pitchers_lookup": pitchers_lookup,
"season": season,
"player_type": "pitcher",
"game_type": game_type,
"start_dt": start_dt,
"end_dt": end_dt,
"month": month,
"opponent": opponent,
"debug": debug,
}

return statcast_search(**params)


def statcast_batter_search(
batters_lookup: str | list[str],
season: str | list[str] = '2024',
season: str | list[str] = "2024",
game_type: str | GameType | list[str | GameType] = GameType.REGULAR_SEASON,
start_dt: str = '',
end_dt: str = '',
month: str | Month | list[str | Month] = '',
opponent: str | MlbTeam | list[str | MlbTeam] = '',
start_dt: str = "",
end_dt: str = "",
month: str | Month | list[str | Month] = "",
opponent: str | MlbTeam | list[str | MlbTeam] = "",
debug: bool = False,
) -> pd.DataFrame:
"""
Expand All @@ -166,18 +167,18 @@ def statcast_batter_search(
"""

if not batters_lookup:
raise ValueError('batters_lookup is required')
raise ValueError("batters_lookup is required")

params = {
'batters_lookup': batters_lookup,
'season': season,
'player_type': 'batter',
'game_type': game_type,
'start_dt': start_dt,
'end_dt': end_dt,
'month': month,
'opponent': opponent,
'debug': debug,
"batters_lookup": batters_lookup,
"season": season,
"player_type": "batter",
"game_type": game_type,
"start_dt": start_dt,
"end_dt": end_dt,
"month": month,
"opponent": opponent,
"debug": debug,
}

return statcast_search(**params)
Loading