Skip to content

Commit e5ddeeb

Browse files
committed
Support custom Artifactory repositories for package metadata
When using --index-url with a custom Artifactory repository, dependency resolution works but the packages array comes back empty. This happens because get_pypi_data_from_purl() hardcodes https://pypi.org/pypi for the JSON API endpoint. Internal packages that don't exist on PyPI.org return 404 and are silently skipped. Changes: - Try each repo's JSON API endpoint (converting /simple to /pypi) before falling back to PyPI.org - Match distribution files by filename instead of full URL, since paths can differ between Simple API and JSON API endpoints Signed-off-by: Kai Hodžić <hodzic.e.k@outlook.com>
1 parent b9df6f9 commit e5ddeeb

4 files changed

Lines changed: 247 additions & 7 deletions

File tree

CHANGELOG.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ v0.15.0
77
- Drop support for python3.9 and add support for python3.14
88
- Ensure that cached file is not empty before use https://github.com/aboutcode-org/python-inspector/pull/251
99
- Filter out empty values from install_requires https://github.com/aboutcode-org/python-inspector/pull/250
10+
- Support custom Artifactory repositories for package metadata by trying each
11+
repo's JSON API endpoint before falling back to PyPI.org
1012

1113
v0.14.4
1214
-----------

src/python_inspector/package_data.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
# See https://aboutcode.org for more information about nexB OSS projects.
1010
#
1111

12+
import posixpath
1213
from typing import Dict
1314
from typing import List
1415
from typing import Optional
16+
from urllib.parse import urlparse
1517

1618
from packageurl import PackageURL
1719

@@ -43,12 +45,26 @@ async def get_pypi_data_from_purl(
4345
version = parsed_purl.version
4446
if not version:
4547
raise Exception("Version is not specified in the purl")
46-
base_path = "https://pypi.org/pypi"
47-
api_url = f"{base_path}/{name}/{version}/json"
48+
49+
# Build list of JSON API URLs to try: each repo's /pypi endpoint, then PyPI.org as fallback.
50+
# For Artifactory, the /simple endpoint has a corresponding /pypi JSON API endpoint.
51+
api_urls = []
52+
for repo in repos:
53+
base_path = repo.index_url.replace("/simple", "/pypi")
54+
api_urls.append(f"{base_path}/{name}/{version}/json")
55+
api_urls.append(f"https://pypi.org/pypi/{name}/{version}/json")
4856

4957
from python_inspector.utils import get_response_async
5058

51-
response = await get_response_async(api_url)
59+
# Try each API URL until one succeeds
60+
response = None
61+
api_url = None
62+
for url in api_urls:
63+
response = await get_response_async(url)
64+
if response:
65+
api_url = url
66+
break
67+
5268
if not response:
5369
return None
5470

@@ -83,14 +99,22 @@ async def get_pypi_data_from_purl(
8399
if wheel_url:
84100
valid_distribution_urls.insert(0, wheel_url)
85101

86-
urls = {url.get("url"): url for url in response.get("urls") or []}
102+
# Index by filename for matching since distribution URLs from /simple may have
103+
# different paths than URLs from /pypi JSON API (especially with Artifactory)
104+
urls_by_filename = {}
105+
for url_entry in response.get("urls") or []:
106+
entry_url = url_entry.get("url")
107+
if entry_url:
108+
filename = posixpath.basename(urlparse(entry_url).path)
109+
urls_by_filename[filename] = url_entry
110+
87111
# iterate over the valid distribution urls and return the first
88112
# one that is matching.
89113
for dist_url in valid_distribution_urls:
90-
if dist_url not in urls:
114+
filename = posixpath.basename(urlparse(dist_url).path)
115+
url_data = urls_by_filename.get(filename)
116+
if not url_data:
91117
continue
92-
93-
url_data = urls.get(dist_url)
94118
digests = url_data.get("digests") or {}
95119

96120
return PackageData(
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"type": "pypi",
3+
"namespace": null,
4+
"name": "requests",
5+
"version": "2.28.0",
6+
"qualifiers": {},
7+
"subpath": null,
8+
"primary_language": "Python",
9+
"description": "",
10+
"release_date": "2022-06-29T15:30:00",
11+
"parties": [],
12+
"keywords": [],
13+
"homepage_url": "https://requests.readthedocs.io",
14+
"download_url": "https://repo.example.com/simple/../packages/ab/cd/requests-2.28.0-py3-none-any.whl",
15+
"size": 62500,
16+
"sha1": null,
17+
"md5": "789xyz",
18+
"sha256": "abc123def456",
19+
"sha512": null,
20+
"bug_tracking_url": null,
21+
"code_view_url": null,
22+
"vcs_url": null,
23+
"copyright": null,
24+
"license_expression": "Apache-2.0",
25+
"declared_license": {},
26+
"notice_text": null,
27+
"source_packages": [],
28+
"file_references": [],
29+
"extra_data": {},
30+
"dependencies": [],
31+
"repository_homepage_url": null,
32+
"repository_download_url": null,
33+
"api_data_url": "https://repo.example.com/pypi/requests/2.28.0/json",
34+
"datasource_id": null,
35+
"purl": "pkg:pypi/requests@2.28.0"
36+
}

tests/test_package_data.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# ScanCode is a trademark of nexB Inc.
6+
# SPDX-License-Identifier: Apache-2.0
7+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
8+
# See https://github.com/aboutcode-org/python-inspector for support or download.
9+
# See https://aboutcode.org for more information about nexB OSS projects.
10+
#
11+
12+
import os
13+
from unittest import mock
14+
15+
import pytest
16+
from commoncode.testcase import FileDrivenTesting
17+
from test_cli import check_data_results
18+
19+
from python_inspector.package_data import get_pypi_data_from_purl
20+
from python_inspector.utils_pypi import Environment
21+
from python_inspector.utils_pypi import PypiSimpleRepository
22+
23+
test_env = FileDrivenTesting()
24+
test_env.test_data_dir = os.path.join(os.path.dirname(__file__), "data")
25+
26+
27+
@pytest.mark.asyncio
28+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
29+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
30+
@mock.patch("python_inspector.utils.get_response_async")
31+
async def test_get_pypi_data_from_purl_tries_repos_in_order(
32+
mock_get_response, mock_get_wheels, mock_get_sdist
33+
):
34+
mock_get_sdist.return_value = None
35+
mock_get_wheels.return_value = []
36+
37+
call_urls = []
38+
39+
async def track_calls(url):
40+
call_urls.append(url)
41+
return None
42+
43+
mock_get_response.side_effect = track_calls
44+
45+
repo1 = PypiSimpleRepository(index_url="https://repo1.example.com/simple")
46+
repo2 = PypiSimpleRepository(index_url="https://repo2.example.com/simple")
47+
env = Environment(python_version="310", operating_system="linux")
48+
49+
await get_pypi_data_from_purl(
50+
purl="pkg:pypi/requests@2.28.0",
51+
environment=env,
52+
repos=[repo1, repo2],
53+
prefer_source=False,
54+
)
55+
56+
assert call_urls == [
57+
"https://repo1.example.com/pypi/requests/2.28.0/json",
58+
"https://repo2.example.com/pypi/requests/2.28.0/json",
59+
"https://pypi.org/pypi/requests/2.28.0/json",
60+
]
61+
62+
63+
@pytest.mark.asyncio
64+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
65+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
66+
@mock.patch("python_inspector.utils.get_response_async")
67+
async def test_get_pypi_data_from_purl_stops_on_first_success(
68+
mock_get_response, mock_get_wheels, mock_get_sdist
69+
):
70+
mock_get_sdist.return_value = None
71+
mock_get_wheels.return_value = []
72+
73+
call_urls = []
74+
75+
async def return_success_on_second(url):
76+
call_urls.append(url)
77+
if "repo2" in url:
78+
return {"info": {}, "urls": []}
79+
return None
80+
81+
mock_get_response.side_effect = return_success_on_second
82+
83+
repo1 = PypiSimpleRepository(index_url="https://repo1.example.com/simple")
84+
repo2 = PypiSimpleRepository(index_url="https://repo2.example.com/simple")
85+
env = Environment(python_version="310", operating_system="linux")
86+
87+
await get_pypi_data_from_purl(
88+
purl="pkg:pypi/requests@2.28.0",
89+
environment=env,
90+
repos=[repo1, repo2],
91+
prefer_source=False,
92+
)
93+
94+
assert call_urls == [
95+
"https://repo1.example.com/pypi/requests/2.28.0/json",
96+
"https://repo2.example.com/pypi/requests/2.28.0/json",
97+
]
98+
99+
100+
@pytest.mark.asyncio
101+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
102+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
103+
@mock.patch("python_inspector.utils.get_response_async")
104+
async def test_get_pypi_data_from_purl_falls_back_to_pypi_org(
105+
mock_get_response, mock_get_wheels, mock_get_sdist
106+
):
107+
mock_get_sdist.return_value = None
108+
mock_get_wheels.return_value = []
109+
110+
call_urls = []
111+
112+
async def track_calls(url):
113+
call_urls.append(url)
114+
return None
115+
116+
mock_get_response.side_effect = track_calls
117+
118+
env = Environment(python_version="310", operating_system="linux")
119+
120+
await get_pypi_data_from_purl(
121+
purl="pkg:pypi/requests@2.28.0",
122+
environment=env,
123+
repos=[],
124+
prefer_source=False,
125+
)
126+
127+
assert call_urls == ["https://pypi.org/pypi/requests/2.28.0/json"]
128+
129+
130+
@pytest.mark.asyncio
131+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
132+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
133+
@mock.patch("python_inspector.utils.get_response_async")
134+
async def test_get_pypi_data_from_purl_matches_by_filename(
135+
mock_get_response, mock_get_wheels, mock_get_sdist
136+
):
137+
mock_get_sdist.return_value = None
138+
mock_get_wheels.return_value = [
139+
"https://repo.example.com/simple/../packages/ab/cd/requests-2.28.0-py3-none-any.whl"
140+
]
141+
142+
async def return_json_response(url):
143+
if "pypi" in url:
144+
return {
145+
"info": {
146+
"name": "requests",
147+
"version": "2.28.0",
148+
"home_page": "https://requests.readthedocs.io",
149+
"license_expression": "Apache-2.0",
150+
},
151+
"urls": [
152+
{
153+
"url": "../../packages/xy/zz/requests-2.28.0-py3-none-any.whl",
154+
"digests": {"sha256": "abc123def456", "md5": "789xyz"},
155+
"size": 62500,
156+
"upload_time": "2022-06-29T15:30:00",
157+
}
158+
],
159+
}
160+
return None
161+
162+
mock_get_response.side_effect = return_json_response
163+
164+
repo = PypiSimpleRepository(index_url="https://repo.example.com/simple")
165+
env = Environment(python_version="310", operating_system="linux")
166+
167+
result = await get_pypi_data_from_purl(
168+
purl="pkg:pypi/requests@2.28.0",
169+
environment=env,
170+
repos=[repo],
171+
prefer_source=False,
172+
)
173+
174+
expected_file = test_env.get_test_loc(
175+
"test_get_pypi_data_from_purl_matches_by_filename-expected.json",
176+
must_exist=False,
177+
)
178+
check_data_results(results=result.to_dict(), expected_file=expected_file)

0 commit comments

Comments
 (0)