Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ select = [
known-first-party = [
'generate_osv_advisories',
'user_agent',
'typings'
'typings',
'utils'
]
11 changes: 5 additions & 6 deletions scripts/download_sa_advisories.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

import json
import os
import typing

import requests

import utils
from typings import drupal
from user_agent import user_agent

Expand All @@ -26,11 +26,10 @@ def get_most_recent_changed_timestamp() -> int:
for file in os.scandir('cache/advisories'):
if not file.is_file() or not file.name.endswith('.json'):
continue
with open(file.path) as f:
advisory = typing.cast(drupal.Advisory, json.load(f))
changed = int(advisory['changed'])
if changed > most_recent_changed or most_recent_changed == 0:
most_recent_changed = changed
advisory = utils.load_sa_advisory(file.path)
changed = int(advisory['changed'])
if changed > most_recent_changed or most_recent_changed == 0:
most_recent_changed = changed
except FileNotFoundError:
pass
return most_recent_changed
Expand Down
7 changes: 5 additions & 2 deletions scripts/generate_osv_advisories.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import semver
from markdownify import markdownify

import utils
from typings import drupal, osv
from user_agent import user_agent

Expand Down Expand Up @@ -318,6 +319,9 @@ def get_credits_from_sa(


def determine_composer_package_name(sa_advisory: drupal.Advisory) -> str:
if sa_advisory['field_project'] is None:
raise Exception('advisory does not have a project!')

project = typing.cast(
drupal.Project, fetch_drupal_node(sa_advisory['field_project']['id'])
)
Expand Down Expand Up @@ -425,8 +429,7 @@ def generate_osv_advisories() -> None:
if not file.is_file() or not file.name.endswith('.json'):
continue

with open(file.path) as f:
sa_advisory: drupal.Advisory = json.load(f)
sa_advisory = utils.load_sa_advisory(file.path)
print(f'processing {sa_advisory["url"]}')
sa_id = file.name.removesuffix('.json')
osv_advisory = build_osv_advisory(sa_id, sa_advisory)
Expand Down
8 changes: 5 additions & 3 deletions scripts/precache_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import requests

import utils
from typings import drupal
from user_agent import user_agent

Expand Down Expand Up @@ -49,9 +50,10 @@ def fetch_and_cache_drupal_nodes() -> None:
if not file.is_file() or not file.name.endswith('.json'):
continue

with open(file.path) as f:
sa_advisory: drupal.Advisory = json.load(f)
ids.add(sa_advisory['field_project']['id'])
sa_advisory = utils.load_sa_advisory(file.path)

if sa_advisory['field_project'] is not None:
ids.add(sa_advisory['field_project']['id'])

for i, batch in enumerate(batched(ids, 50, strict=False)):
print(f'fetching {len(batch)} nodes ({len(ids) - i * 50 - len(batch)} remaining)')
Expand Down
29 changes: 25 additions & 4 deletions scripts/typings/drupal.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,42 @@ class Node(typing.TypedDict):
type: str


class Advisory(Node):
class AdvisoryBase(Node):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: I've had to split this into three types because Python doesn't allow you to change the type of a field when inheriting, regardless of the variance of the type

i.e. ideally we'd do something like

class AdvisoryRaw(Node):
  field_sa_description: RichTextField | list[None]

class Advisory(AdvisoryRaw):
  field_sa_description: RichTextField

but that is not supported

field_is_psa: typing.Literal['0', '1']
field_affected_versions: str | None
field_project: EntityReferenceField
field_fixed_in: list[EntityReferenceField]
field_sa_reported_by: RichTextField | list[typing.Never]
field_sa_criticality: str
field_sa_cve: list[str]
field_sa_description: RichTextField
created: str
changed: str
title: str
url: str


class Advisory(AdvisoryBase):
"""
Represents an advisory sourced from the Drupal JSON API that has been
transformed to make it easier to work with
"""

field_project: EntityReferenceField | None
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: I've included this field as its technically possible, even though I think its required on the upstreams end

field_sa_reported_by: RichTextField
field_sa_description: RichTextField


class AdvisoryRaw(AdvisoryBase):
"""
Represents an advisory provided by the Drupal JSON API without any post-processing.

This mainly means that object fields which don't have a value in the database
will be represented by an empty list due to how associated arrays in PHP work
"""

field_project: EntityReferenceField | list[typing.Never]
field_sa_reported_by: RichTextField | list[typing.Never]
field_sa_description: RichTextField | list[typing.Never]


class Project(Node):
# type will be project_module, project_theme, or project_core
field_project_machine_name: str
Expand Down
32 changes: 32 additions & 0 deletions scripts/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import json

from typings import drupal


def load_sa_advisory(file_path: str) -> drupal.Advisory:
"""
Loads a Drupal advisory from a json file stored on disk, making some adjustments
in the process to make it easier to work with
"""
with open(file_path) as f:
raw_advisory: drupal.AdvisoryRaw = json.load(f)

# noinspection PyTypeChecker
# https://youtrack.jetbrains.com/issue/PY-58714/False-positive-TypedDict-has-missing-key-when-using-unpacking
sa_advisory: drupal.Advisory = {
**raw_advisory,
'field_project': None,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: since this represents a relationship with another entity whose ID we will look sometimes, I didn't think it was a good idea to try and have a fallback for this

for the other fields though I felt that was safe and worth doing as it lets us avoid changing most other code

'field_sa_reported_by': {'format': '1', 'value': ''},
'field_sa_description': {'format': '1', 'value': ''},
}

if isinstance(raw_advisory['field_project'], dict):
sa_advisory['field_project'] = raw_advisory['field_project']

if isinstance(raw_advisory['field_sa_reported_by'], dict):
sa_advisory['field_sa_reported_by'] = raw_advisory['field_sa_reported_by']

if isinstance(raw_advisory['field_sa_description'], dict):
sa_advisory['field_sa_description'] = raw_advisory['field_sa_description']

return sa_advisory