|
3 | 3 | # (c) Technische Universität Berlin, innoCampus <info@isis.tu-berlin.de> |
4 | 4 |
|
5 | 5 | import logging |
| 6 | +import re |
6 | 7 | from asyncio import to_thread |
7 | 8 | from gzip import decompress |
8 | | -from urllib.parse import urljoin |
| 9 | +from urllib.parse import urljoin, urlparse |
9 | 10 |
|
10 | 11 | from questionpy_server.cache import CacheItemTooLargeError, FileCache |
11 | 12 | from questionpy_server.repository.helper import download |
12 | 13 | from questionpy_server.repository.models import RepoMeta, RepoPackage, RepoPackageIndex |
13 | 14 | from questionpy_server.utils.logger import URLAdapter |
14 | 15 |
|
| 16 | +_SCHEME_AND_AUTH_PATTERN = re.compile(r"^https?://(?:[^/]+@)?") |
| 17 | +_FILENAME_SPECIAL_CHARACTERS_PATTERN = re.compile(r"[/\\?%*:|\"<>,;=\s]+") |
| 18 | + |
| 19 | +def _url_to_safe_path_part(url: str) -> str: |
| 20 | + return _FILENAME_SPECIAL_CHARACTERS_PATTERN.sub("-", _SCHEME_AND_AUTH_PATTERN.sub("", url)) |
15 | 21 |
|
16 | 22 | class Repository: |
17 | 23 | def __init__(self, url: str, cache: FileCache): |
18 | 24 | self._url_base = url |
19 | 25 | self._url_index = urljoin(self._url_base, "PACKAGES.json.gz") |
20 | 26 | self._url_meta = urljoin(self._url_base, "META.json") |
21 | 27 |
|
| 28 | + self._cache_key = _url_to_safe_path_part(url) |
| 29 | + |
22 | 30 | self._cache = cache |
23 | 31 |
|
24 | 32 | logger = logging.getLogger("questionpy-server:repository") |
25 | 33 | self._log = URLAdapter(logger, {"url": self._url_base}) |
26 | 34 |
|
27 | 35 | async def get_meta(self) -> RepoMeta: |
28 | | - """Downloads and verifies metadata. |
29 | | -
|
30 | | - Returns: |
31 | | - RepoMeta: Metadata |
32 | | - """ |
| 36 | + """Downloads and verifies metadata.""" |
33 | 37 | meta = await download(self._url_meta) |
34 | 38 | # TODO: verify downloaded data |
35 | 39 | return RepoMeta.model_validate_json(meta) |
36 | 40 |
|
37 | 41 | async def get_packages(self, meta: RepoMeta) -> dict[str, RepoPackage]: |
38 | 42 | """Downloads and verifies package index. |
39 | 43 |
|
40 | | - Args: |
41 | | - meta (RepoMeta): Metadata |
42 | | -
|
43 | 44 | Returns: |
44 | | - dict[str, RepoPackage]: package index, where keys are package hashes |
| 45 | + Package index, where keys are package hashes. |
45 | 46 | """ |
46 | 47 | try: |
47 | 48 | # Try to get the index from cache. |
@@ -71,11 +72,8 @@ async def get_packages(self, meta: RepoMeta) -> dict[str, RepoPackage]: |
71 | 72 | async def get_package(self, package: RepoPackage) -> bytes: |
72 | 73 | """Download a specific package from the repository. |
73 | 74 |
|
74 | | - Args: |
75 | | - package (RepoPackage): repository package |
76 | | -
|
77 | 75 | Returns: |
78 | | - bytes: raw package bytes |
| 76 | + Raw package bytes. |
79 | 77 | """ |
80 | 78 | url = urljoin(self._url_base, package.path) |
81 | 79 | return await download(url, size=package.size, expected_hash=package.sha256) |
0 commit comments