Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
58f0dd9
Add OWASP Top 10 and API importer support
Bornunique911 Mar 24, 2026
7218432
Fix cheat sheet parser test expectations on importer branches
Bornunique911 Apr 1, 2026
616e9f4
Use official OWASP cheat sheet URLs in importer branches
Bornunique911 Apr 1, 2026
9951332
Add OWASP AI resource importer support
Bornunique911 Mar 24, 2026
ca0071a
Add OWASP Kubernetes importer support
Bornunique911 Mar 24, 2026
861e843
Normalize OWASP cheat sheet references
Bornunique911 Mar 24, 2026
43367bf
Add refresh scripts for OWASP resources
Bornunique911 Mar 24, 2026
336be1a
Retry transient failures during upstream sync
Bornunique911 Mar 24, 2026
a20319d
Add file parsing function to convert YAML/JSON documents to defs.Docu…
Bornunique911 Apr 23, 2026
b0b40ed
Add OWASP Top 10 and API importer support
Bornunique911 Mar 24, 2026
18f2ef7
Fix cheat sheet parser test expectations on importer branches
Bornunique911 Apr 1, 2026
e4df9ae
Use official OWASP cheat sheet URLs in importer branches
Bornunique911 Apr 1, 2026
60cd2b0
Add OWASP AI resource importer support
Bornunique911 Mar 24, 2026
5632dc9
Add OWASP Kubernetes importer support
Bornunique911 Mar 24, 2026
8f022f5
Normalize OWASP cheat sheet references
Bornunique911 Mar 24, 2026
d547e99
Add refresh scripts for OWASP resources
Bornunique911 Mar 24, 2026
8a19464
Refactor links definition in TestCheatsheetsParser for clarity
Bornunique911 Apr 30, 2026
c007b65
Merge branch 'review/issue-471-refresh-scripts' into review/issue-471…
Bornunique911 May 6, 2026
9a073c6
Improved boilerplate, github link scrolling to readme, and completing…
Bornunique911 May 2, 2026
2733d10
Fix cheat sheet parser test expectations on importer branches
Bornunique911 Apr 1, 2026
7ea289c
Use official OWASP cheat sheet URLs in importer branches
Bornunique911 Apr 1, 2026
dec11c4
Normalize OWASP cheat sheet references
Bornunique911 Mar 24, 2026
bfd1126
Merge branch 'main' into review/issue-471-upstream-sync-retries
Bornunique911 May 6, 2026
d378a40
Fixed black formatting issue
Bornunique911 May 6, 2026
e3b19f7
Merge branch 'main' into review/issue-471-upstream-sync-retries
Bornunique911 May 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 122 additions & 18 deletions application/cmd/cre_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,51 @@
app = None


def fetch_upstream_json(
path: str,
timeout: Optional[float] = None,
max_attempts: Optional[int] = None,
backoff_seconds: Optional[float] = None,
) -> Dict[str, Any]:
base_url = os.environ.get("CRE_UPSTREAM_API_URL", "https://opencre.org/rest/v1")
timeout = timeout or float(os.environ.get("CRE_UPSTREAM_TIMEOUT_SECONDS", "30"))
max_attempts = max_attempts or int(os.environ.get("CRE_UPSTREAM_MAX_ATTEMPTS", "4"))
backoff_seconds = backoff_seconds or float(
os.environ.get("CRE_UPSTREAM_RETRY_BACKOFF_SECONDS", "2")
)
url = f"{base_url}{path}"
last_error: Optional[Exception] = None

for attempt in range(1, max_attempts + 1):
try:
response = requests.get(url, timeout=timeout)
if response.status_code == 200:
return response.json()

status_error = RuntimeError(
f"cannot connect to upstream status code {response.status_code}"
)
# Retry only on transient upstream failures.
if response.status_code < 500 and response.status_code != 429:
raise status_error
last_error = status_error
except requests.exceptions.RequestException as exc:
last_error = exc

if attempt < max_attempts:
logger.warning(
"upstream fetch failed for %s on attempt %s/%s, retrying",
url,
attempt,
max_attempts,
)
time.sleep(backoff_seconds * attempt)

if last_error:
raise RuntimeError(f"upstream fetch failed for {url}") from last_error
raise RuntimeError(f"upstream fetch failed for {url}")


def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node:
"""
for each link find if either the root node or the link have a CRE,
Expand Down Expand Up @@ -355,6 +400,8 @@ def register_standard(
):
if os.environ.get("CRE_NO_GEN_EMBEDDINGS") == "1":
generate_embeddings = False
if os.environ.get("CRE_NO_CALCULATE_GAP_ANALYSIS"):
calculate_gap_analysis = False

if not standard_entries:
logger.warning("register_standard() called with no standard_entries")
Expand Down Expand Up @@ -591,15 +638,7 @@ def download_graph_from_upstream(cache: str) -> None:
collection = db_connect(path=cache).with_graph()

def download_cre_from_upstream(creid: str):
cre_response = requests.get(
os.environ.get("CRE_UPSTREAM_API_URL", "https://opencre.org/rest/v1")
+ f"/id/{creid}"
)
if cre_response.status_code != 200:
raise RuntimeError(
f"cannot connect to upstream status code {cre_response.status_code}"
)
data = cre_response.json()
data = fetch_upstream_json(f"/id/{creid}")
credict = data["data"]
cre = defs.Document.from_dict(credict)
if cre.id in imported_cres:
Expand All @@ -611,15 +650,7 @@ def download_cre_from_upstream(creid: str):
if link.document.doctype == defs.Credoctypes.CRE:
download_cre_from_upstream(link.document.id)

root_cres_response = requests.get(
os.environ.get("CRE_UPSTREAM_API_URL", "https://opencre.org/rest/v1")
+ "/root_cres"
)
if root_cres_response.status_code != 200:
raise RuntimeError(
f"cannot connect to upstream status code {root_cres_response.status_code}"
)
data = root_cres_response.json()
data = fetch_upstream_json("/root_cres")
for root_cre in data["data"]:
cre = defs.Document.from_dict(root_cre)
register_cre(cre, collection)
Expand Down Expand Up @@ -909,6 +940,54 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover
BaseParser().register_resource(
secure_headers.SecureHeaders, db_connection_str=args.cache_file
)
if args.owasp_top10_2025_in:
from application.utils.external_project_parsers.parsers import owasp_top10_2025

BaseParser().register_resource(
owasp_top10_2025.OwaspTop10_2025, db_connection_str=args.cache_file
)
if args.owasp_api_top10_2023_in:
from application.utils.external_project_parsers.parsers import (
owasp_api_top10_2023,
)

BaseParser().register_resource(
owasp_api_top10_2023.OwaspApiTop10_2023,
db_connection_str=args.cache_file,
)
if args.owasp_kubernetes_top10_2022_in:
from application.utils.external_project_parsers.parsers import (
owasp_kubernetes_top10_2022,
)

BaseParser().register_resource(
owasp_kubernetes_top10_2022.OwaspKubernetesTop10_2022,
db_connection_str=args.cache_file,
)
if args.owasp_kubernetes_top10_2025_in:
from application.utils.external_project_parsers.parsers import (
owasp_kubernetes_top10_2025,
)

BaseParser().register_resource(
owasp_kubernetes_top10_2025.OwaspKubernetesTop10_2025,
db_connection_str=args.cache_file,
)
if args.owasp_llm_top10_2025_in:
from application.utils.external_project_parsers.parsers import (
owasp_llm_top10_2025,
)

BaseParser().register_resource(
owasp_llm_top10_2025.OwaspLlmTop10_2025,
db_connection_str=args.cache_file,
)
if args.owasp_aisvs_in:
from application.utils.external_project_parsers.parsers import owasp_aisvs

BaseParser().register_resource(
owasp_aisvs.OwaspAisvs, db_connection_str=args.cache_file
)
if args.pci_dss_4_in:
from application.utils.external_project_parsers.parsers import pci_dss

Expand Down Expand Up @@ -1008,6 +1087,31 @@ def generate_embeddings(db_url: str) -> None:
prompt_client.PromptHandler(database, load_all_embeddings=True)


def parse_file(
filename: str, yamldocs: List[Any], scollection
) -> Optional[List[defs.Document]]:
"""
Parse a list of dictionaries (YAML/JSON documents) into defs.Document objects.
Returns None and logs a critical error if any element is not a dict.
"""
if not all(isinstance(doc, dict) for doc in yamldocs):
logger.critical("Malformed file %s, skipping", filename)
return None

def normalize_links(doc: dict) -> dict:
"""Make sure link dicts use 'ltype' key instead of 'type'."""
if "links" in doc:
for link in doc["links"]:
if "type" in link and "ltype" not in link:
link["ltype"] = link.pop("type")
# Recursively normalize nested documents (if any)
if "document" in link and isinstance(link["document"], dict):
normalize_links(link["document"])
return doc

return [defs.Document.from_dict(normalize_links(dict(doc))) for doc in yamldocs]


def regenerate_embeddings(db_url: str) -> None:
"""Wipe all embedding rows, then rebuild (CRE + every node type) like ``--generate_embeddings``."""
database = db_connect(path=db_url)
Expand Down
56 changes: 49 additions & 7 deletions application/tests/cheatsheets_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,21 @@ class Repo:
repo.working_dir = loc
cre = defs.CRE(name="blah", id="223-780")
self.collection.add_cre(cre)
with open(os.path.join(os.path.join(loc, "cheatsheets"), "cs.md"), "w") as mdf:
mdf.write(cs)
with open(
os.path.join(
os.path.join(loc, "cheatsheets"),
"Secrets_Management_Cheat_Sheet.md",
),
"w",
) as mdf:
with open(
os.path.join(
os.path.join(loc, "cheatsheets"),
"Secrets_Management_Cheat_Sheet.md",
),
"w",
) as mdf:
mdf.write(cs)
mock_clone.return_value = repo
entries = cheatsheets_parser.Cheatsheets().parse(
cache=self.collection, ph=PromptHandler(database=self.collection)
Expand All @@ -45,22 +58,51 @@ class Repo:
# verify the external tagging convention, not just enum wiring.
expected = defs.Standard(
name="OWASP Cheat Sheets",
hyperlink="https://github.com/foo/bar/tree/master/cs.md",
hyperlink="https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html",
section="Secrets Management Cheat Sheet",
links=[defs.Link(document=cre, ltype=defs.LinkTypes.LinkedTo)],
links=[defs.Link(document=cre, ltype=defs.LinkTypes.AutomaticallyLinkedTo)],
tags=[
"family:guidance",
"subtype:cheatsheet",
"source:owasp_cheatsheets",
"audience:developer",
"maturity:stable",
"source:owasp_cheatsheets",
],
)
self.maxDiff = None
for name, nodes in entries.results.items():
self.assertEqual(name, parser.name)
self.assertEqual(len(nodes), 1)
self.assertCountEqual(expected.todict(), nodes[0].todict())
sections = {node.section for node in nodes}
self.assertIn("Secrets Management Cheat Sheet", sections)
secret_entry = [
node
for node in nodes
if node.section == "Secrets Management Cheat Sheet"
][0]
self.assertEqual(expected.todict(), secret_entry.todict())

def test_register_supplemental_cheatsheets(self) -> None:
for cre_id, name in [
("118-110", "API/web services"),
("724-770", "Technical application access control"),
("623-550", "Denial Of Service protection"),
]:
self.collection.add_cre(defs.CRE(name=name, id=cre_id))

entries = cheatsheets_parser.Cheatsheets().register_supplemental_cheatsheets(
cache=self.collection
)
rest = [
entry for entry in entries if entry.section == "REST Security Cheat Sheet"
][0]
self.assertEqual(
"https://cheatsheetseries.owasp.org/cheatsheets/REST_Security_Cheat_Sheet.html",
rest.hyperlink,
)
self.assertEqual(
["118-110", "724-770", "623-550"],
[link.document.id for link in rest.links],
)

cheatsheets_md = """ # Secrets Management Cheat Sheet

Expand Down
108 changes: 108 additions & 0 deletions application/tests/cre_main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from unittest import mock
from unittest.mock import Mock, patch
from rq import Queue, job
import requests
from rq import Queue, job
from application.utils import redis
from application.prompt_client import prompt_client as prompt_client
from application.tests.utils import data_gen
Expand Down Expand Up @@ -470,6 +472,112 @@ def test_register_cre(self) -> None:
],
)

@patch("application.cmd.cre_main.time.sleep")
@patch("application.cmd.cre_main.requests.get")
def test_fetch_upstream_json_retries_transient_failures(
self, mock_get, mock_sleep
) -> None:
transient_error = requests.exceptions.ConnectionError("reset by peer")
success_response = Mock()
success_response.status_code = 200
success_response.json.return_value = {"data": []}
mock_get.side_effect = [transient_error, success_response]

data = main.fetch_upstream_json("/root_cres")

self.assertEqual(data, {"data": []})
self.assertEqual(mock_get.call_count, 2)
mock_sleep.assert_called_once()

def test_parse_file(self) -> None:
file: List[Dict[str, Any]] = [
{
"description": "Verify that approved cryptographic algorithms are used in the generation, seeding, and verification.",
"doctype": defs.Credoctypes.CRE,
"id": "157-573",
"links": [
{
"type": defs.LinkTypes.LinkedTo,
"document": {
"doctype": defs.Credoctypes.Standard,
"name": "TOP10",
"section": "https://owasp.org/www-project-top-ten/2017/A5_2017-Broken_Access_Control",
},
},
{
"type": defs.LinkTypes.LinkedTo,
"document": {
"doctype": defs.Credoctypes.Standard,
"name": "ISO 25010",
"section": "Secure data storage",
},
},
],
"name": "CREDENTIALS_MANAGEMENT_CRYPTOGRAPHIC_DIRECTIVES",
},
{
"description": "Desc",
"doctype": defs.Credoctypes.CRE,
"id": "141-141",
"name": "name",
},
]
expected = [
defs.CRE(
doctype=defs.Credoctypes.CRE,
id="157-573",
description="Verify that approved cryptographic algorithms are used in the generation, seeding, and verification.",
name="CREDENTIALS_MANAGEMENT_CRYPTOGRAPHIC_DIRECTIVES",
links=[
defs.Link(
document=defs.Standard(
doctype=defs.Credoctypes.Standard,
name="TOP10",
section="https://owasp.org/www-project-top-ten/2017/A5_2017-Broken_Access_Control",
),
ltype=defs.LinkTypes.LinkedTo,
),
defs.Link(
document=defs.Standard(
doctype=defs.Credoctypes.Standard,
name="ISO 25010",
section="Secure data storage",
),
ltype=defs.LinkTypes.LinkedTo,
),
],
),
defs.CRE(id="141-141", description="Desc", name="name"),
]
with self.assertLogs("application.cmd.cre_main", level=logging.FATAL) as logs:
# negative test first parse_file accepts a list of objects
result = main.parse_file(
filename="tests",
yamldocs=[
"no",
"valid",
"objects",
"here",
{
"1": 2,
},
],
scollection=self.collection,
)

self.assertEqual(result, None)
self.assertIn(
"CRITICAL:application.cmd.cre_main:Malformed file tests, skipping",
logs.output,
)

self.maxDiff = None

res = main.parse_file(
filename="tests", yamldocs=file, scollection=self.collection
)
self.assertCountEqual(res, expected)

@patch.object(main, "db_connect")
@patch.object(Queue, "enqueue_call")
@patch.object(redis, "wait_for_jobs")
Expand Down
Loading
Loading