Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
from vulnerabilities.pipelines import remove_duplicate_advisories
from vulnerabilities.pipelines.v2_improvers import collect_ssvc_trees
from vulnerabilities.pipelines.v2_improvers import compute_advisory_content_hash
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
from vulnerabilities.pipelines.v2_improvers import (
Expand Down Expand Up @@ -74,5 +75,6 @@
compute_advisory_todo.ComputeToDo,
collect_ssvc_trees.CollectSSVCPipeline,
relate_severities.RelateSeveritiesPipeline,
compute_advisory_content_hash.ComputeAdvisoryContentHash,
]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 5.2.11 on 2026-03-11 08:46

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0115_impactedpackageaffecting_and_more"),
]

operations = [
migrations.AddField(
model_name="advisoryv2",
name="advisory_content_hash",
field=models.CharField(
blank=True,
help_text="A unique hash computed from the content of the advisory used to identify advisories with the same content.",
max_length=64,
null=True,
),
),
]
36 changes: 7 additions & 29 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3010,6 +3010,13 @@ class AdvisoryV2(models.Model):
help_text="Related advisories that are used to calculate the severity of this advisory.",
)

advisory_content_hash = models.CharField(
max_length=64,
blank=True,
null=True,
help_text="A unique hash computed from the content of the advisory used to identify advisories with the same content.",
)

@property
def risk_score(self):
"""
Expand Down Expand Up @@ -3078,35 +3085,6 @@ def get_aliases(self):
"""
return self.aliases.all()

def compute_advisory_content(self):
"""
Compute a unique content hash for an advisory by normalizing its data and hashing it.

:param advisory: An Advisory object
:return: SHA-256 hash digest as content hash
"""
normalized_data = {
"summary": normalize_text(self.summary),
"impacted_packages": sorted(
[impact.to_dict() for impact in self.impacted_packages.all()],
key=lambda x: json.dumps(x, sort_keys=True),
),
"patches": sorted(
[patch.to_patch_data().to_dict() for patch in self.patches.all()],
key=lambda x: json.dumps(x, sort_keys=True),
),
"severities": sorted(
[sev.to_vulnerability_severity_data().to_dict() for sev in self.severities.all()],
key=lambda x: (x.get("system"), x.get("value")),
),
"weaknesses": normalize_list([weakness.cwe_id for weakness in self.weaknesses.all()]),
}

normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True)
content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest()

return content_hash

alias = get_aliases


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ def load_advisories(

fixed_version_range = None
try:
fixed_version_range = AlpineLinuxVersionRange.from_versions([version])
if version:
fixed_version_range = AlpineLinuxVersionRange.from_versions([version])
except InvalidVersion as e:
logger(
f"{version!r} is not a valid AlpineVersion {e!r}",
Expand Down
15 changes: 8 additions & 7 deletions vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,19 +330,20 @@ def to_version_ranges(self, versions_data, fixed_versions):
"=": "=",
}
comparator = comparator_by_range_expression.get(range_expression)
if comparator:
if comparator and version_value and version_value not in self.ignorable_versions:
constraints.append(
VersionConstraint(comparator=comparator, version=SemverVersion(version_value))
)

for fixed_version in fixed_versions:
# The VersionConstraint method `invert()` inverts the fixed_version's comparator,
# enabling inclusion of multiple fixed versions with the `affected_version_range` values.
constraints.append(
VersionConstraint(
comparator="=",
version=SemverVersion(fixed_version),
).invert()
)
if fixed_version and fixed_version not in self.ignorable_versions:
constraints.append(
VersionConstraint(
comparator="=",
version=SemverVersion(fixed_version),
).invert()
)

return ApacheVersionRange(constraints=constraints)
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
spdx_license_expression = "CC0-1.0"
license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt"
repo_url = "git+https://github.com/dependabot/elixir-security-advisories"
run_once = True

precedence = 200

Expand Down
4 changes: 3 additions & 1 deletion vulnerabilities/pipelines/v2_importers/gitlab_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def parse_gitlab_advisory(
original_advisory_text=json.dumps(gitlab_advisory, indent=2, ensure_ascii=False),
)
affected_version_range = None
fixed_version_range = None
fixed_versions = gitlab_advisory.get("fixed_versions") or []
affected_range = gitlab_advisory.get("affected_range")
gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"])
Expand Down Expand Up @@ -285,7 +286,8 @@ def parse_gitlab_advisory(
if affected_version_range:
vrc = affected_version_range.__class__

fixed_version_range = vrc.from_versions(parsed_fixed_versions)
if parsed_fixed_versions:
fixed_version_range = vrc.from_versions(parsed_fixed_versions)
if not fixed_version_range and not affected_version_range:
return

Expand Down
4 changes: 3 additions & 1 deletion vulnerabilities/pipelines/v2_importers/ruby_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ def get_affected_packages(record, purl):
affected_packages = []
for unaffected_version in record.get("unaffected_versions", []):
try:
affected_version_range = GemVersionRange.from_native(unaffected_version).invert()
if unaffected_version:
unaffected_version = unaffected_version.strip()
affected_version_range = GemVersionRange.from_native(unaffected_version).invert()
validate_comparators(affected_version_range.constraints)
affected_packages.append(
AffectedPackageV2(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#


from aboutcode.pipeline import LoopProgress

from vulnerabilities.models import AdvisoryV2
from vulnerabilities.pipelines import VulnerableCodePipeline
from vulnerabilities.utils import compute_advisory_content


class ComputeAdvisoryContentHash(VulnerableCodePipeline):
"""Compute Advisory Content Hash for Advisory."""

pipeline_id = "compute_advisory_content_hash_v2"

@classmethod
def steps(cls):
return (cls.compute_advisory_content_hash,)

def compute_advisory_content_hash(self):
"""Compute Advisory Content Hash for Advisory."""

advisories = AdvisoryV2.objects.filter(advisory_content_hash__isnull=True)

advisories_count = advisories.count()

progress = LoopProgress(
total_iterations=advisories_count,
logger=self.log,
progress_step=1,
)

to_update = []
batch_size = 5000

for advisory in progress.iter(advisories.iterator(chunk_size=batch_size)):
advisory.advisory_content_hash = compute_advisory_content(advisory)
to_update.append(advisory)

if len(to_update) >= batch_size:
AdvisoryV2.objects.bulk_update(
to_update,
["advisory_content_hash"],
batch_size=batch_size,
)
to_update.clear()

if to_update:
AdvisoryV2.objects.bulk_update(
to_update,
["advisory_content_hash"],
batch_size=batch_size,
)

self.log("Finished computing advisory_content_hash")
3 changes: 3 additions & 0 deletions vulnerabilities/pipes/advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from vulnerabilities.models import VulnerabilitySeverity
from vulnerabilities.models import Weakness
from vulnerabilities.pipes.univers_utils import get_exact_purls_v2
from vulnerabilities.utils import compute_advisory_content


def get_or_create_aliases(aliases: List) -> QuerySet:
Expand Down Expand Up @@ -301,6 +302,7 @@ def insert_advisory_v2(
advisory_obj = None
created = False
content_id = compute_content_id_v2(advisory_data=advisory)
advisory_content_hash = compute_advisory_content(advisory_data=advisory)
try:
default_data = {
"datasource_id": pipeline_id,
Expand All @@ -311,6 +313,7 @@ def insert_advisory_v2(
"original_advisory_text": advisory.original_advisory_text,
"url": advisory.url,
"precedence": precedence,
"advisory_content_hash": advisory_content_hash,
}

advisory_obj, created = AdvisoryV2.objects.get_or_create(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from unittest.mock import patch

import pytest

from vulnerabilities.models import AdvisoryV2
from vulnerabilities.pipelines.v2_improvers.compute_advisory_content_hash import (
ComputeAdvisoryContentHash,
)

pytestmark = pytest.mark.django_db


@pytest.fixture
def advisory_factory():
def _create(count, with_hash=False, start=0):
objs = []
for i in range(start, start + count):
objs.append(
AdvisoryV2(
summary=f"summary {i}",
advisory_content_hash="existing_hash" if with_hash else None,
unique_content_id=f"unique_id_{i}",
advisory_id=f"ADV-{i}",
datasource_id="ds",
avid=f"ds/ADV-{i}",
url=f"https://example.com/ADV-{i}",
)
)
return AdvisoryV2.objects.bulk_create(objs)

return _create


def run_pipeline():
pipeline = ComputeAdvisoryContentHash()
pipeline.compute_advisory_content_hash()


@patch(
"vulnerabilities.pipelines.v2_improvers.compute_advisory_content_hash.compute_advisory_content"
)
def test_pipeline_updates_only_missing_hash(mock_compute, advisory_factory):
advisory_factory(3, with_hash=False, start=0)
advisory_factory(2, with_hash=True, start=100)

mock_compute.return_value = "new_hash"

run_pipeline()

updated = AdvisoryV2.objects.filter(advisory_content_hash="new_hash").count()
untouched = AdvisoryV2.objects.filter(advisory_content_hash="existing_hash").count()

assert updated == 3
assert untouched == 2
assert mock_compute.call_count == 3


@patch(
"vulnerabilities.pipelines.v2_improvers.compute_advisory_content_hash.compute_advisory_content"
)
def test_pipeline_bulk_update_batches(mock_compute, advisory_factory):
advisory_factory(6000, with_hash=False)

mock_compute.return_value = "batch_hash"

run_pipeline()

assert AdvisoryV2.objects.filter(advisory_content_hash="batch_hash").count() == 6000

assert mock_compute.call_count == 6000


@patch(
"vulnerabilities.pipelines.v2_improvers.compute_advisory_content_hash.compute_advisory_content"
)
def test_pipeline_no_advisories(mock_compute):
run_pipeline()

assert mock_compute.call_count == 0
8 changes: 4 additions & 4 deletions vulnerabilities/tests/test_api_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ def setUp(self):

def test_list_with_purl_filter(self):
url = reverse("package-v3-list")
with self.assertNumQueries(29):
with self.assertNumQueries(31):
response = self.client.get(url, {"purl": "pkg:pypi/sample@1.0.0"})
assert response.status_code == 200
assert "packages" in response.data["results"]
Expand All @@ -868,7 +868,7 @@ def test_list_with_purl_filter(self):

def test_bulk_lookup(self):
url = reverse("package-v3-bulk-lookup")
with self.assertNumQueries(28):
with self.assertNumQueries(30):
response = self.client.post(url, {"purls": ["pkg:pypi/sample@1.0.0"]}, format="json")
assert response.status_code == 200
assert "packages" in response.data
Expand All @@ -878,7 +878,7 @@ def test_bulk_lookup(self):
def test_bulk_search_plain(self):
url = reverse("package-v3-bulk-search")
payload = {"purls": ["pkg:pypi/sample@1.0.0"], "plain_purl": True, "purl_only": False}
with self.assertNumQueries(28):
with self.assertNumQueries(30):
response = self.client.post(url, payload, format="json")
assert response.status_code == 200
assert "packages" in response.data
Expand All @@ -894,7 +894,7 @@ def test_bulk_search_purl_only(self):

def test_lookup_single_package(self):
url = reverse("package-v3-lookup")
with self.assertNumQueries(21):
with self.assertNumQueries(23):
response = self.client.post(url, {"purl": "pkg:pypi/sample@1.0.0"}, format="json")
assert response.status_code == 200
assert any(pkg["purl"] == "pkg:pypi/sample@1.0.0" for pkg in response.data)
Expand Down
Loading
Loading