Skip to content

Commit dc5ae89

Browse files
author
Marc Damie
committed
Add a light dataset option to reduce disk space usage
1 parent f85d22b commit dc5ae89

1 file changed

Lines changed: 12 additions & 4 deletions

File tree

fedivertex/main.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@ class GraphLoader:
2525
}
2626
UNDIRECTED_GRAPHS = ["federation"]
2727

28-
def __init__(
29-
self,
30-
url="https://www.kaggle.com/datasets/marcdamie/fediverse-graph-dataset/croissant/download",
31-
):
28+
def __init__(self, light_version=True):
29+
self.light_version = light_version
30+
if self.light_version:
31+
url = "https://www.kaggle.com/datasets/marcdamie/fediverse-graph-dataset-reduced/croissant/download"
32+
else:
33+
url = "https://www.kaggle.com/datasets/marcdamie/fediverse-graph-dataset/croissant/download"
3234
try:
3335
self.dataset = mlc.Dataset(jsonld=url)
3436
except json.JSONDecodeError as err:
@@ -58,6 +60,12 @@ def _check_input(self, software: str, graph_type: str) -> NoneType:
5860
f"{graph_type} is not a valid graph type for {software}. Valid types: {self.VALID_GRAPH_TYPES[software]}"
5961
)
6062

63+
if self.light_version and software == "mastodon" and graph_type == "federation":
64+
raise ValueError(
65+
f"The graph {software} {graph_type} is not included in the light version of Fedivertex\n"
66+
"To download the full version, generate the dataset loader as follows: `GraphLoader(light_version=False)`"
67+
)
68+
6169
def _fetch_date_index(self, software: str, graph_type: str, index: int) -> str:
6270
"""Returns the i-th date available for a given graph type.
6371
The dates are sorted increasingly.

0 commit comments

Comments
 (0)