Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion bats_ai/core/management/commands/load_public_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,23 @@ def _ingest_files_from_manifest(
owner: User,
public: bool,
limit: int | None,
offset: int | None,
file_key: str = "file_key",
tag_keys: list[str] | None = None,
):
if tag_keys is None:
tag_keys = []

if offset is None:
offset = 0

iterations = 0

with open(manifest) as manifest_file:
reader = DictReader(manifest_file)
for line in reader:
for idx, line in enumerate(reader):
if idx < offset:
continue
if limit and iterations >= limit:
return
iterations += 1
Expand Down Expand Up @@ -223,6 +229,9 @@ def add_arguments(self, parser):
type=int,
help="Limit the number of WAV files to be imported",
)
parser.add_argument(
"--offset", type=int, help="Begin ingest from the specified position in the manifest"
)
parser.add_argument(
"--filekey",
type=str,
Expand Down Expand Up @@ -265,7 +274,10 @@ def handle(self, *args, **options):

public = options.get("public", False)
limit = options.get("limit")
offset = options.get("limit")
file_key = options.get("filekey", "file_key")
if offset:
self.stdout.write(f"Skipping the first {offset} row(s)...")
if limit:
self.stdout.write(f"Ingesting the first {limit} files from {manifest}...")
_ingest_files_from_manifest(
Expand All @@ -275,6 +287,7 @@ def handle(self, *args, **options):
owner=owner,
public=public,
limit=limit,
offset=offset,
file_key=file_key,
tag_keys=tag_keys,
)