Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/packagedcode/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,48 @@ def update_workspace_members(cls, workspace_members, codebase):
for member in workspace_members:
member.save(codebase)

class NpmrcHandler(BaseNpmHandler):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be part of the npm assembly only if the data present in .npmrc files can be used to updated package data information in other npm manifests. The registry information present could be used to update npm urls in npm packages found alongside these manifests but these URLs are not always usable. And you'd have to modify the BaseNpmHandler.assemble function to handle/use these too.

So let's keep this as a subclass of models.NonAssemblableDatafileHandler to avoid any assembly from these files and keep things simple for now.

datasource_id = 'npmrc'
path_patterns = ('*/.npmrc',)
default_package_type = 'npm'
default_primary_language = None
description = 'npm .npmrc configuration file'
documentation_url = 'https://docs.npmjs.com/cli/v11/configuring-npm/npmrc'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
documentation_url = 'https://docs.npmjs.com/cli/v11/configuring-npm/npmrc'
documentation_url = 'https://docs.npmjs.com/cli/configuring-npm/npmrc'

this would pin the link to this version and will soon be outdated. I see some other npm doc URLs also have versions, can you also remove those in the PR?


@classmethod
def parse(cls, location, package_only=False):
"""
parse [.npmrc] file and store result in key : value pair.
convert key : value pair to object and return it.
"""
extra_data = {}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are .npmrc examples with license/author info which we need to parse and store properly, this is not just extra data.

Please also research examples found in the wild/docs to see what other fields we can use like this to map useful data to package data fields.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'll update the handler to map meaningful fields (license, author, homepage, etc.) to ScanCode’s structured package fields.

and other config-only fields (like proxy, cafile, always-auth) in extra_data, since they don’t map to package metadata.

with io.open(location, encoding='utf-8') as lines:
for line in lines:
line = line.strip()
if not line or line.startswith(';') or line.startswith('#'):
continue
if '=' not in line:
continue
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
# ignore empty key but allow empty values
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there cases with empty values that is still useful to keep? can you provide examples?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for pointing it out.
i did not even think about it at that time..
i'm not able to find any empty value (in key value pair) that could be useful.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yet i have not seen any empty values case, should i include it to handler or just skip it (example: registry= ) ?

if not key:
continue
# if value is in single quote or in double quote, strip them
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
if len(value) >= 2:
value = value[1:-1]
extra_data[key] = value

package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=cls.default_primary_language,
description=cls.description,
extra_data=extra_data,
)
yield models.PackageData.from_data(package_data, package_only)

def get_urls(namespace, name, version, **kwargs):
return dict(
Expand Down
8 changes: 8 additions & 0 deletions tests/packagedcode/data/npm/basic/.npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; sample .npmrc for tests
# a comment line
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use real .npmrc files seen in the wild like with: https://github.com/search?q=path%3A*.npmrc&type=code

This also helps you looks for what kind of data is present and whether we can use these differently rather than just storing in the extra_data

registry=https://registry.npmjs.org/
cache=~/.npm
strict-ssl=true
//registry.npmjs.org/:_authToken="abc123"
init.author.name=John Doe
emptykey=
53 changes: 53 additions & 0 deletions tests/packagedcode/data/npm/basic/.npmrc.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
[
{
"type": "npm",
"namespace": null,
"name": null,
"version": null,
"qualifiers": {},
"subpath": null,
"primary_language": null,
"description": "npm .npmrc configuration file",
"release_date": null,
"parties": [],
"keywords": [],
"homepage_url": null,
"download_url": null,
"size": null,
"sha1": null,
"md5": null,
"sha256": null,
"sha512": null,
"bug_tracking_url": null,
"code_view_url": null,
"vcs_url": null,
"copyright": null,
"holder": null,
"declared_license_expression": null,
"declared_license_expression_spdx": null,
"license_detections": [],
"other_license_expression": null,
"other_license_expression_spdx": null,
"other_license_detections": [],
"extracted_license_statement": null,
"notice_text": null,
"source_packages": [],
"file_references": [],
"is_private": false,
"is_virtual": false,
"extra_data": {
"registry": "https://registry.npmjs.org/",
"cache": "~/.npm",
"strict-ssl": "true",
"//registry.npmjs.org/:_authToken": "abc123",
"init.author.name": "John Doe",
"emptykey": ""
},
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": null,
"datasource_id": "npmrc",
"purl": null
}
]
15 changes: 15 additions & 0 deletions tests/packagedcode/test_npmrc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to create a new file, add the tests in the test_npm.py file


from packagedcode import npm
from packages_test_utils import PackageTester
from scancode_config import REGEN_TEST_FIXTURES


class TestNpmrc(PackageTester):
test_data_dir = os.path.join(os.path.dirname(__file__), 'data')

def test_parse_basic_npmrc(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also add a test to check npm.NpmrcHandler.is_datafile() works as expected

test_file = self.get_test_loc('npm/basic/.npmrc')
expected_loc = self.get_test_loc('npm/basic/.npmrc.expected')
packages_data = npm.NpmrcHandler.parse(test_file)
self.check_packages_data(packages_data, expected_loc, regen=REGEN_TEST_FIXTURES)