Skip to content

Commit 476f26b

Browse files
committed
Add PURL_TYPES and enforce validation of PackageURL types
Closes #155 and #181
1 parent c7c7b46 commit 476f26b

9 files changed

Lines changed: 91 additions & 225 deletions

File tree

CHANGELOG.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ Changelog
44
0.18.0 (unlreleased)
55
--------------------
66

7+
- Add ``PURL_TYPES`` constant sourced from the ``purl-types-index.json`` registry.
8+
Enforce validation of PackageURL types.
9+
Remove ``sourceforge`` and ``gitlab`` support from ``url2purl`` and ``purl2url``.
10+
Remove ``rubygems`` route alias from ``purl2url`` (the registered type is ``gem``).
11+
https://github.com/package-url/packageurl-python/issues/181
12+
https://github.com/package-url/packageurl-python/issues/155
13+
https://github.com/package-url/packageurl-python/pull/188
14+
715
- Add support for Python 3.14
816

917
- Drop support for Python 3.8

src/packageurl/__init__.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,56 @@
6161
"""
6262

6363

64+
PURL_TYPES: set[str] = {
65+
"alpm",
66+
"apk",
67+
"bazel",
68+
"bitbucket",
69+
"bitnami",
70+
"cargo",
71+
"chrome-extension",
72+
"cocoapods",
73+
"composer",
74+
"conan",
75+
"conda",
76+
"cpan",
77+
"cran",
78+
"deb",
79+
"docker",
80+
"gem",
81+
"generic",
82+
"github",
83+
"golang",
84+
"hackage",
85+
"hex",
86+
"huggingface",
87+
"julia",
88+
"luarocks",
89+
"maven",
90+
"mlflow",
91+
"npm",
92+
"nuget",
93+
"oci",
94+
"opam",
95+
"otp",
96+
"pub",
97+
"pypi",
98+
"qpkg",
99+
"rpm",
100+
"swid",
101+
"swift",
102+
"vscode-extension",
103+
"yocto",
104+
}
105+
"""Registered pURL types.
106+
107+
Sourced from the `purl-types-index.json
108+
<https://github.com/package-url/purl-spec/blob/main/purl-types-index.json>`_
109+
registry, which is automatically regenerated whenever a new pURL type is
110+
registered.
111+
"""
112+
113+
64114
class ValidationSeverity(str, Enum):
65115
ERROR = "error"
66116
WARNING = "warning"
@@ -124,8 +174,15 @@ def normalize_type(type: AnyStr | None, encode: bool | None = True) -> str | Non
124174

125175
type_str = type if isinstance(type, str) else type.decode("utf-8")
126176
quoter = get_quoter(encode)
127-
type_str = quoter(type_str)
128-
return type_str.strip().lower() or None
177+
type_str = quoter(type_str).strip().lower()
178+
if not type_str:
179+
return None
180+
if type_str not in PURL_TYPES:
181+
raise ValueError(
182+
f"Invalid purl type: {type_str!r}. "
183+
f"Must be one of: {', '.join(sorted(PURL_TYPES))}."
184+
)
185+
return type_str
129186

130187

131188
def normalize_namespace(

src/packageurl/contrib/purl2url.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ def get_repo_download_url_by_package_type(
4444
download_url_by_type = {
4545
"github": f"https://github.com/{namespace}/{name}/archive/{version}.{archive_extension}",
4646
"bitbucket": f"https://bitbucket.org/{namespace}/{name}/get/{version}.{archive_extension}",
47-
"gitlab": f"https://gitlab.com/{namespace}/{name}/-/archive/{version}/{name}-{version}.{archive_extension}",
4847
}
4948
return download_url_by_type.get(type)
5049

@@ -158,21 +157,7 @@ def build_github_repo_url(purl):
158157
return repo_url
159158

160159

161-
@repo_router.route("pkg:gitlab/.*")
162-
def build_gitlab_repo_url(purl):
163-
"""
164-
Return a gitlab repo URL from the `purl` string.
165-
"""
166-
purl_data = PackageURL.from_string(purl)
167-
168-
namespace = purl_data.namespace
169-
name = purl_data.name
170-
171-
if name and namespace:
172-
return f"https://gitlab.com/{namespace}/{name}"
173-
174-
175-
@repo_router.route("pkg:(gem|rubygems)/.*")
160+
@repo_router.route("pkg:gem/.*")
176161
def build_rubygems_repo_url(purl):
177162
"""
178163
Return a rubygems repo URL from the `purl` string.
@@ -351,7 +336,7 @@ def build_cargo_download_url(purl):
351336
return f"https://crates.io/api/v1/crates/{name}/{version}/download"
352337

353338

354-
@download_router.route("pkg:(gem|rubygems)/.*")
339+
@download_router.route("pkg:gem/.*")
355340
def build_rubygems_download_url(purl):
356341
"""
357342
Return a rubygems download URL from the `purl` string.
@@ -435,7 +420,7 @@ def build_nuget_download_url(purl):
435420
return f"https://www.nuget.org/api/v2/package/{name}/{version}"
436421

437422

438-
@download_router.route("pkg:gitlab/.*", "pkg:bitbucket/.*", "pkg:github/.*")
423+
@download_router.route("pkg:bitbucket/.*", "pkg:github/.*")
439424
def build_repo_download_url(purl):
440425
"""
441426
Return a gitlab download URL from the `purl` string.

src/packageurl/contrib/url2purl.py

Lines changed: 0 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -372,75 +372,6 @@ def build_composer_purl(uri):
372372
register_pattern("nuget", nuget_api_pattern)
373373

374374

375-
# https://sourceforge.net/projects/turbovnc/files/3.1/turbovnc-3.1.tar.gz/download
376-
# https://sourceforge.net/projects/scribus/files/scribus/1.6.0/scribus-1.6.0.tar.gz/download
377-
# https://sourceforge.net/projects/ventoy/files/v1.0.96/Ventoy%201.0.96%20release%20source%20code.tar.gz/download
378-
# https://sourceforge.net/projects/geoserver/files/GeoServer/2.23.4/geoserver-2.23.4-war.zip/download
379-
sourceforge_download_pattern = (
380-
r"^https?://.*sourceforge.net/projects/"
381-
r"(?P<name>.+)/"
382-
r"files/"
383-
r"(?i:(?P=name)/)?" # optional case-insensitive name segment repeated
384-
r"v?(?P<version>[0-9\.]+)/" # version restricted to digits and dots
385-
r"(?i:(?P=name)).*(?P=version).*" # case-insensitive matching for {name}-{version}
386-
r"(/download)$" # ending with "/download"
387-
)
388-
389-
register_pattern("sourceforge", sourceforge_download_pattern)
390-
391-
392-
# https://sourceforge.net/projects/spacesniffer/files/spacesniffer_1_3_0_2.zip/download
393-
sourceforge_download_pattern_bis = (
394-
r"^https?://.*sourceforge.net/projects/"
395-
r"(?P<name>.+)/"
396-
r"files/"
397-
r"(?i:(?P=name))_*(?P<version>[0-9_]+).*"
398-
r"(/download)$" # ending with "/download"
399-
)
400-
401-
register_pattern("sourceforge", sourceforge_download_pattern_bis)
402-
403-
404-
@purl_router.route("https?://.*sourceforge.net/project/.*")
405-
def build_sourceforge_purl(uri):
406-
# We use a more general route pattern instead of using `sourceforge_pattern`
407-
# below by itself because we want to capture all sourceforge download URLs,
408-
# even the ones that do not fit `sourceforge_pattern`. This helps prevent
409-
# url2purl from attempting to create a generic PackageURL from a sourceforge
410-
# URL that we can't handle.
411-
412-
# http://master.dl.sourceforge.net/project/libpng/zlib/1.2.3/zlib-1.2.3.tar.bz2
413-
sourceforge_pattern = (
414-
r"^https?://.*sourceforge.net/projects?/"
415-
r"(?P<namespace>([^/]+))/" # do not allow more "/" segments
416-
r"(OldFiles/)?"
417-
r"(?P<name>.+)/"
418-
r"(?P<version>[v0-9\.]+)/" # version restricted to digits and dots
419-
r"(?P=name).*(?P=version).*" # {name}-{version} repeated in the filename
420-
r"[^/]$" # not ending with "/"
421-
)
422-
423-
sourceforge_purl = purl_from_pattern("sourceforge", sourceforge_pattern, uri)
424-
425-
if not sourceforge_purl:
426-
# Get the project name from `uri` and use that as the Package name
427-
# http://master.dl.sourceforge.net/project/aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip
428-
split_uri = uri.split("/project/")
429-
430-
# http://master.dl.sourceforge.net, aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip
431-
if len(split_uri) >= 2:
432-
# aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip
433-
remaining_uri_path = split_uri[1]
434-
# aloyscore, aloyscore, 0.1a1%2520stable, 0.1a1_stable_AloysCore.zip
435-
remaining_uri_path_segments = remaining_uri_path.split("/")
436-
if remaining_uri_path_segments:
437-
project_name = remaining_uri_path_segments[0] # aloyscore
438-
sourceforge_purl = PackageURL(
439-
type="sourceforge", name=project_name, qualifiers={"download_url": uri}
440-
)
441-
return sourceforge_purl
442-
443-
444375
# https://crates.io/api/v1/crates/rand/0.7.2/download
445376
cargo_pattern = r"^https?://crates.io/api/v1/crates/(?P<name>.+)/(?P<version>.+)(\/download)$"
446377

@@ -667,74 +598,6 @@ def build_bitbucket_purl(url):
667598
)
668599

669600

670-
@purl_router.route("https?://gitlab\\.com/(?!.*/archive/).*")
671-
def build_gitlab_purl(url):
672-
"""
673-
Return a PackageURL object from Gitlab `url`.
674-
For example:
675-
https://gitlab.com/TG1999/firebase/-/tree/1a122122/views
676-
https://gitlab.com/TG1999/firebase/-/tree
677-
https://gitlab.com/TG1999/firebase/-/master
678-
https://gitlab.com/tg1999/Firebase/-/tree/master
679-
https://gitlab.com/tg1999/Firebase/-/commit/bf04e5f289885cf2f20a92b387bcc6df33e30809
680-
"""
681-
# https://gitlab.com/<ns>/<name>/-/commit/<sha>
682-
commit_pattern = (
683-
r"https?://gitlab.com/"
684-
r"(?P<namespace>[^/]+)/(?P<name>[^/]+)/-/commit/"
685-
r"(?P<version>[0-9a-fA-F]{7,64})/?$"
686-
)
687-
688-
commit_matche = re.search(commit_pattern, url)
689-
if commit_matche:
690-
return PackageURL(
691-
type="gitlab",
692-
namespace=commit_matche.group("namespace"),
693-
name=commit_matche.group("name"),
694-
version=commit_matche.group("version"),
695-
qualifiers={},
696-
subpath="",
697-
)
698-
699-
segments = get_path_segments(url)
700-
701-
if not len(segments) >= 2:
702-
return
703-
namespace = segments[0]
704-
name = segments[1]
705-
version = None
706-
subpath = None
707-
708-
# https://gitlab.com/TG1999/firebase/master
709-
if (len(segments) >= 3) and segments[2] != "-" and segments[2] != "tree":
710-
version = segments[2]
711-
subpath = "/".join(segments[3:])
712-
713-
# https://gitlab.com/TG1999/firebase/-/tree/master
714-
if len(segments) >= 5 and (segments[2] == "-" and segments[3] == "tree"):
715-
version = segments[4]
716-
subpath = "/".join(segments[5:])
717-
718-
return PackageURL(
719-
type="gitlab",
720-
namespace=namespace,
721-
name=name,
722-
version=version,
723-
subpath=subpath,
724-
)
725-
726-
727-
# https://gitlab.com/hoppr/hoppr/-/archive/v1.11.1-dev.2/hoppr-v1.11.1-dev.2.tar.gz
728-
gitlab_archive_pattern = (
729-
r"^https?://gitlab.com/"
730-
r"(?P<namespace>.+)/(?P<name>.+)/-/archive/(?P<version>.+)/"
731-
r"(?P=name)-(?P=version).*"
732-
r"[^/]$"
733-
)
734-
735-
register_pattern("gitlab", gitlab_archive_pattern)
736-
737-
738601
# https://hackage.haskell.org/package/cli-extras-0.2.0.0/cli-extras-0.2.0.0.tar.gz
739602
hackage_download_pattern = (
740603
r"^https?://hackage.haskell.org/package/"

tests/contrib/data/url2purl.json

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -155,31 +155,6 @@
155155
"https://www.nuget.org/api/v2/package/MvvmLightLibs/4.1.23": "pkg:nuget/MvvmLightLibs@4.1.23",
156156
"https://www.nuget.org/api/v2/package/Twilio/3.4.1": "pkg:nuget/Twilio@3.4.1",
157157
"https://api.nuget.org/v3-flatcontainer/newtonsoft.json/10.0.1/newtonsoft.json.10.0.1.nupkg": "pkg:nuget/newtonsoft.json@10.0.1",
158-
"http://master.dl.sourceforge.net/project/zznotes/zznotes/1.1.2/zznotes-1.1.2.tar.gz": "pkg:sourceforge/zznotes/zznotes@1.1.2",
159-
"http://master.dl.sourceforge.net/project/zapping/zvbi/0.2.35/zvbi-0.2.35.tar.bz2": "pkg:sourceforge/zapping/zvbi@0.2.35",
160-
"http://master.dl.sourceforge.net/project/libpng/zlib/1.2.3/zlib-1.2.3.tar.bz2": "pkg:sourceforge/libpng/zlib@1.2.3",
161-
"http://master.dl.sourceforge.net/project/xmlstar/xmlstarlet/1.0.0/xmlstarlet-1.0.0-1.src.rpm": "pkg:sourceforge/xmlstar/xmlstarlet@1.0.0",
162-
"http://master.dl.sourceforge.net/project/wxmozilla/wxMozilla/0.5.5/wxMozilla-0.5.5.exe": "pkg:sourceforge/wxmozilla/wxMozilla@0.5.5",
163-
"http://iweb.dl.sourceforge.net/project/sblim/sblim-cim-client2/2.2.5/sblim-cim-client2-2.2.5-src.zip": "pkg:sourceforge/sblim/sblim-cim-client2@2.2.5",
164-
"http://master.dl.sourceforge.net/project/zinnia/zinnia-win32/0.06/zinnia-win32-0.06.zip": "pkg:sourceforge/zinnia/zinnia-win32@0.06",
165-
"http://iweb.dl.sourceforge.net/project/findbugs/findbugs/1.3.4/findbugs-1.3.4.tar.gz/": "pkg:sourceforge/findbugs?download_url=http://iweb.dl.sourceforge.net/project/findbugs/findbugs/1.3.4/findbugs-1.3.4.tar.gz/",
166-
"http://master.dl.sourceforge.net/project/arestc/net/sf/arestc/arestc/0.1.4/arestc-0.1.4-javadoc.jar": "pkg:sourceforge/arestc?download_url=http://master.dl.sourceforge.net/project/arestc/net/sf/arestc/arestc/0.1.4/arestc-0.1.4-javadoc.jar",
167-
"http://master.dl.sourceforge.net/project/intraperson/OldFiles/intraperson/0.28/intraperson-0.28.tar.gz": "pkg:sourceforge/intraperson/intraperson@0.28",
168-
"http://master.dl.sourceforge.net/project/pwiki/pwiki/0.1.2/0.1.2.zip": "pkg:sourceforge/pwiki?download_url=http://master.dl.sourceforge.net/project/pwiki/pwiki/0.1.2/0.1.2.zip",
169-
"http://master.dl.sourceforge.net/project/iswraid/iswraid/0.1.4.3/2.4.28-pre3-iswraid.patch.gz": "pkg:sourceforge/iswraid?download_url=http://master.dl.sourceforge.net/project/iswraid/iswraid/0.1.4.3/2.4.28-pre3-iswraid.patch.gz",
170-
"http://master.dl.sourceforge.net/project/aloyscore/aloyscore/0.1a1%20stable/0.1a1_stable_AloysCore.zip": "pkg:sourceforge/aloyscore?download_url=http://master.dl.sourceforge.net/project/aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip",
171-
"http://master.dl.sourceforge.net/project/myenterprise/OldFiles/1.0.0.2.MyEnterprise.Source.zip": "pkg:sourceforge/myenterprise?download_url=http://master.dl.sourceforge.net/project/myenterprise/OldFiles/1.0.0.2.MyEnterprise.Source.zip",
172-
"http://master.dl.sourceforge.net/project/wxhaskell/wxhaskell/wxhaskell-0.9/wxhaskell-src-0.9.zip": "pkg:sourceforge/wxhaskell?download_url=http://master.dl.sourceforge.net/project/wxhaskell/wxhaskell/wxhaskell-0.9/wxhaskell-src-0.9.zip",
173-
"http://master.dl.sourceforge.net/project/a2freedom/A2/1.2/a2freedom-1.2.zip": "pkg:sourceforge/a2freedom?download_url=http://master.dl.sourceforge.net/project/a2freedom/A2/1.2/a2freedom-1.2.zip",
174-
"http://master.dl.sourceforge.net/project/tinyos/OldFiles/tinyos/1.1.0/tinyos-1.1.0.tar.gz": "pkg:sourceforge/tinyos/tinyos@1.1.0",
175-
"http://master.dl.sourceforge.net/project/urlchecker/lu/ng/urlchecker/urlchecker/1.7/urlchecker-1.7-javadoc.jar": "pkg:sourceforge/urlchecker?download_url=http://master.dl.sourceforge.net/project/urlchecker/lu/ng/urlchecker/urlchecker/1.7/urlchecker-1.7-javadoc.jar",
176-
"http://master.dl.sourceforge.net/project/zclasspath/maven2/org/zclasspath/zclasspath/1.5/zclasspath-1.5.jar": "pkg:sourceforge/zclasspath?download_url=http://master.dl.sourceforge.net/project/zclasspath/maven2/org/zclasspath/zclasspath/1.5/zclasspath-1.5.jar",
177-
"http://master.dl.sourceforge.net/project/googleimagedown/project/v1.1/GoogleImageDownloader-v1.1-src.tar.bz2": "pkg:sourceforge/googleimagedown?download_url=http://master.dl.sourceforge.net/project/googleimagedown/project/v1.1/GoogleImageDownloader-v1.1-src.tar.bz2",
178-
"https://sourceforge.net/projects/scribus/files/scribus/1.6.0/scribus-1.6.0.tar.gz/download": "pkg:sourceforge/scribus@1.6.0",
179-
"https://sourceforge.net/projects/turbovnc/files/3.1/turbovnc-3.1.tar.gz/download": "pkg:sourceforge/turbovnc@3.1",
180-
"https://sourceforge.net/projects/ventoy/files/v1.0.96/Ventoy%201.0.96%20release%20source%20code.tar.gz/download": "pkg:sourceforge/ventoy@1.0.96",
181-
"https://sourceforge.net/projects/geoserver/files/GeoServer/2.23.4/geoserver-2.23.4-war.zip/download": "pkg:sourceforge/geoserver@2.23.4",
182-
"https://sourceforge.net/projects/spacesniffer/files/spacesniffer_1_3_0_2.zip/download": "pkg:sourceforge/spacesniffer@1_3_0_2",
183158
"https://crates.io/api/v1/crates/rand/0.7.2/download": "pkg:cargo/rand@0.7.2",
184159
"https://crates.io/api/v1/crates/clap/2.33.0/download": "pkg:cargo/clap@2.33.0",
185160
"https://crates.io/api/v1/crates/structopt/0.3.11/download": "pkg:cargo/structopt@0.3.11",
@@ -253,17 +228,6 @@
253228
"https://bitbucket.org/multicoreware/x265/downloads/x265_2.6.tar.gz": "pkg:bitbucket/multicoreware/x265?download_url=https://bitbucket.org/multicoreware/x265/downloads/x265_2.6.tar.gz",
254229
"https://bitbucket.org/robeden/trove/downloads/trove-3.0.3.zip": "pkg:bitbucket/robeden/trove?download_url=https://bitbucket.org/robeden/trove/downloads/trove-3.0.3.zip",
255230
"https://bitbucket.org/efotinis/deskpins/downloads/DeskPins-1.31-setup.exe": "pkg:bitbucket/efotinis/deskpins?download_url=https://bitbucket.org/efotinis/deskpins/downloads/DeskPins-1.31-setup.exe",
256-
"https://gitlab.com/TG1999/firebase/-/tree/1a122122/views": "pkg:gitlab/tg1999/firebase@1a122122#views",
257-
"https://gitlab.com/tg1999/firebase": "pkg:gitlab/tg1999/firebase",
258-
"https://gitlab.com/TG1999/firebase/-/": "pkg:gitlab/tg1999/firebase",
259-
"https://gitlab.com/TG1999/firebase/-/tree": "pkg:gitlab/tg1999/firebase",
260-
"https://gitlab.com/TG1999/firebase/-/master": "pkg:gitlab/tg1999/firebase",
261-
"https://gitlab.com/TG1999/firebase/tree/": "pkg:gitlab/tg1999/firebase",
262-
"https://gitlab.com/TG1999/firebase/master": "pkg:gitlab/tg1999/firebase@master",
263-
"https://gitlab.com/TG1999/firebase/-/tree/master": "pkg:gitlab/tg1999/firebase@master",
264-
"https://gitlab.com/tg1999/Firebase/-/tree/master": "pkg:gitlab/tg1999/firebase@master",
265-
"https://gitlab.com/TG1999/FIREBASE": "pkg:gitlab/tg1999/firebase",
266-
"https://gitlab.com/hoppr/hoppr/-/archive/v1.11.1-dev.2/hoppr-v1.11.1-dev.2.tar.gz": "pkg:gitlab/hoppr/hoppr@v1.11.1-dev.2",
267231
"https://hackage.haskell.org/package/a50-0.5/a50-0.5.tar.gz": "pkg:hackage/a50@0.5",
268232
"https://hackage.haskell.org/package/AC-HalfInteger-1.2.1/AC-HalfInteger-1.2.1.tar.gz": "pkg:hackage/AC-HalfInteger@1.2.1",
269233
"https://hackage.haskell.org/package/3d-graphics-examples-0.0.0.2/3d-graphics-examples-0.0.0.2.tar.gz": "pkg:hackage/3d-graphics-examples@0.0.0.2",
@@ -276,6 +240,5 @@
276240
"https://cran.r-project.org/src/contrib/jsonlite_1.8.8.tar.gz": "pkg:cran/jsonlite@1.8.8",
277241
"https://packagemanager.rstudio.com/cran/2022-06-23/src/contrib/curl_4.3.2.tar.gz": "pkg:cran/curl@4.3.2?download_url=https://packagemanager.rstudio.com/cran/2022-06-23/src/contrib/curl_4.3.2.tar.gz",
278242
"https://github.com/TG1999/first_repo/commit/98e516011d6e096e25247b82fc5f196bbeecff10": "pkg:github/tg1999/first_repo@98e516011d6e096e25247b82fc5f196bbeecff10",
279-
"https://gitlab.com/TG1999/first_repo/-/commit/bf04e5f289885cf2f20a92b387bcc6df33e30809": "pkg:gitlab/tg1999/first_repo@bf04e5f289885cf2f20a92b387bcc6df33e30809",
280243
"https://bitbucket.org/TG1999/first_repo/commits/16a60c4a74ef477cd8c16ca82442eaab2fbe8c86": "pkg:bitbucket/tg1999/first_repo@16a60c4a74ef477cd8c16ca82442eaab2fbe8c86"
281244
}

0 commit comments

Comments
 (0)