Skip to content

Commit 676de98

Browse files
authored
Fixed rule_id being inserted into finding_id field (#1055)
* Fixed rule_id being inserted into finding_id field * Enforced finding_id in results and fixed some tests
1 parent 05bbefc commit 676de98

20 files changed

+28247
-23635
lines changed

src/codemodder/result.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,9 @@ def from_sarif(
126126
cls, sarif_result: ResultModel, sarif_run: Run, truncate_rule_id: bool = False
127127
) -> Self:
128128
rule_id = cls.extract_rule_id(sarif_result, sarif_run, truncate_rule_id)
129-
finding_id = cls.extract_finding_id(sarif_result) or rule_id
129+
finding_id = cls.extract_finding_id(sarif_result)
130+
if not finding_id:
131+
raise ValueError("Result does not have a finding_id.")
130132
return cls(
131133
rule_id=rule_id,
132134
locations=cls.extract_locations(sarif_result, sarif_run),

src/codemodder/semgrep.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import itertools
2+
import json
23
import subprocess
4+
import uuid
35
from pathlib import Path
46
from tempfile import NamedTemporaryFile
57
from typing import Iterable, Optional
@@ -90,7 +92,9 @@ def run(
9092
if not yaml_files:
9193
raise ValueError("No Semgrep rules were provided")
9294

93-
with NamedTemporaryFile(prefix="semgrep", suffix=".sarif") as temp_sarif_file:
95+
with NamedTemporaryFile(
96+
prefix="semgrep", suffix=".sarif", mode="w+"
97+
) as temp_sarif_file:
9498
command = [
9599
"semgrep",
96100
"scan",
@@ -114,6 +118,17 @@ def run(
114118
stdout=None if execution_context.verbose else subprocess.PIPE,
115119
stderr=None if execution_context.verbose else subprocess.PIPE,
116120
)
121+
# Insert guid in results
122+
temp_sarif_file.seek(0)
123+
sarif = Sarif.model_validate(json.load(temp_sarif_file))
124+
for run in sarif.runs:
125+
for result in run.results or []:
126+
if not result.guid:
127+
result.guid = uuid.uuid4()
128+
temp_sarif_file.seek(0)
129+
temp_sarif_file.write(sarif.model_dump_json(exclude_none=True, by_alias=True))
130+
temp_sarif_file.seek(0)
131+
117132
if call.returncode != 0:
118133
if not execution_context.verbose:
119134
logger.error("captured semgrep stderr: %s", call.stderr)

tests/codemods/semgrep/test_semgrep_django_secure_set_cookie.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def index(request, template):
4141
"tool": {"driver": {"name": "Semgrep OSS"}},
4242
"results": [
4343
{
44+
"guid": "ecf8007d-0eac-4151-92c7-c5dc8290f28e",
4445
"fingerprints": {"matchBasedId/v1": "123"},
4546
"locations": [
4647
{

tests/codemods/semgrep/test_semgrep_enable_jinja2_autoescape.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def test_import(self, tmpdir):
3030
"tool": {"driver": {"name": "Semgrep OSS"}},
3131
"results": [
3232
{
33+
"guid": "282ad4eb-3b68-4ee4-b8ff-f779ea14b589",
3334
"fingerprints": {"matchBasedId/v1": "123"},
3435
"locations": [
3536
{

tests/codemods/semgrep/test_semgrep_harden_pyyaml.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def test_pyyaml(self, tmpdir):
2929
"tool": {"driver": {"name": "Semgrep OSS"}},
3030
"results": [
3131
{
32+
"guid": "df15a793-eea0-4fee-a65d-8923ca058265",
3233
"fingerprints": {"matchBasedId/v1": "123"},
3334
"locations": [
3435
{
@@ -88,6 +89,7 @@ def index(request):
8889
"tool": {"driver": {"name": "Semgrep OSS"}},
8990
"results": [
9091
{
92+
"guid": "2291849a-3e04-4969-94b8-87a21e818889",
9193
"fingerprints": {"matchBasedId/v1": "123"},
9294
"locations": [
9395
{

tests/codemods/semgrep/test_semgrep_jwt_decode_verify.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def test_import(self, tmpdir):
2828
"tool": {"driver": {"name": "Semgrep OSS"}},
2929
"results": [
3030
{
31+
"guid": "3efd541b-4c31-4e7e-89f2-7fe0d7ebd468",
3132
"fingerprints": {"matchBasedId/v1": "123"},
3233
"locations": [
3334
{

tests/codemods/semgrep/test_semgrep_nan_injection.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def home(request):
4141
"tool": {"driver": {"name": "Semgrep OSS"}},
4242
"results": [
4343
{
44+
"guid": "b796b74b-275c-4785-b341-76170b43f6d4",
4445
"fingerprints": {"matchBasedId/v1": "1932"},
4546
"locations": [
4647
{
@@ -178,6 +179,7 @@ def view(request):
178179
"tool": {"driver": {"name": "Semgrep OSS"}},
179180
"results": [
180181
{
182+
"guid": "6470e0a8-2eeb-4268-8677-f96161207b40",
181183
"fingerprints": {"matchBasedId/v1": "1fdbd5a"},
182184
"locations": [
183185
{
@@ -204,6 +206,7 @@ def view(request):
204206
"ruleId": "python.django.security.nan-injection.nan-injection",
205207
},
206208
{
209+
"guid": "b3056d9a-1618-40be-bf5e-989278305cf0",
207210
"fingerprints": {"matchBasedId/v1": "1fdbd5a"},
208211
"locations": [
209212
{
@@ -230,6 +233,7 @@ def view(request):
230233
"ruleId": "python.django.security.nan-injection.nan-injection",
231234
},
232235
{
236+
"guid": "3356587c-dd3a-49e1-baee-0aafc0a91511",
233237
"fingerprints": {"matchBasedId/v1": "1fdbd5a"},
234238
"locations": [
235239
{
@@ -256,6 +260,7 @@ def view(request):
256260
"ruleId": "python.django.security.nan-injection.nan-injection",
257261
},
258262
{
263+
"guid": "626d3911-ed0b-414d-a2c9-af2245b0baee",
259264
"fingerprints": {"matchBasedId/v1": "1fdbd5a"},
260265
"locations": [
261266
{
@@ -315,6 +320,7 @@ def view(request):
315320
"tool": {"driver": {"name": "Semgrep OSS"}},
316321
"results": [
317322
{
323+
"guid": "60e089cd-472e-489e-a264-cfc6e33e651a",
318324
"fingerprints": {"matchBasedId/v1": "asdfg"},
319325
"locations": [
320326
{
@@ -373,6 +379,7 @@ def view(request):
373379
"tool": {"driver": {"name": "Semgrep OSS"}},
374380
"results": [
375381
{
382+
"guid": "014e3945-144d-4c28-960b-4dd09f2a2b8f",
376383
"fingerprints": {"matchBasedId/v1": "q324"},
377384
"locations": [
378385
{
@@ -429,6 +436,7 @@ def view(request):
429436
"tool": {"driver": {"name": "Semgrep OSS"}},
430437
"results": [
431438
{
439+
"guid": "d0540cd9-b999-4756-8392-ca2702e94438",
432440
"fingerprints": {"matchBasedId/v1": "asdtg"},
433441
"locations": [
434442
{
@@ -487,6 +495,7 @@ def view(request):
487495
"tool": {"driver": {"name": "Semgrep OSS"}},
488496
"results": [
489497
{
498+
"guid": "33ffdd04-0f27-475c-8c11-2405e9b77526",
490499
"fingerprints": {"matchBasedId/v1": "asd2"},
491500
"locations": [
492501
{

tests/codemods/semgrep/test_semgrep_no_csrf_exempt.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def foo():
7979
"tool": {"driver": {"name": "Semgrep OSS"}},
8080
"results": [
8181
{
82+
"guid": "8a8007b3-404d-4107-9e0d-4bb11536b78c",
8283
"fingerprints": {"matchBasedId/v1": "a3ca2"},
8384
"locations": [
8485
{
@@ -105,6 +106,7 @@ def foo():
105106
"ruleId": "python.django.security.audit.csrf-exempt.no-csrf-exempt",
106107
},
107108
{
109+
"guid": "71260758-6dee-4c96-a4e3-22b143b2633e",
108110
"fingerprints": {"matchBasedId/v1": "1cc62"},
109111
"locations": [
110112
{

tests/codemods/semgrep/test_semgrep_rsa_key_size.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def _run_and_assert_with_results(self, tmpdir, input_code, expected_output):
1818
"tool": {"driver": {"name": "Semgrep OSS"}},
1919
"results": [
2020
{
21+
"guid": "55b6e8fa-8e41-4470-b887-05c02a5e1196",
2122
"fingerprints": {"matchBasedId/v1": "123"},
2223
"locations": [
2324
{

tests/codemods/semgrep/test_semgrep_sql_parametrization.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def f():
8383
"tool": {"driver": {"name": "Semgrep OSS"}},
8484
"results": [
8585
{
86+
"guid": "2273caac-50fa-409d-97e8-a39219eb9afe",
8687
"fingerprints": {"matchBasedId/v1": "123"},
8788
"locations": [
8889
{
@@ -110,6 +111,7 @@ def f():
110111
"ruleId": "python.django.security.injection.sql.sql-injection-using-db-cursor-execute.sql-injection-db-cursor-execute",
111112
},
112113
{
114+
"guid": "8e8fed36-7e72-4b1f-ad49-2e1a50587595",
113115
"fingerprints": {"matchBasedId/v1": "123"},
114116
"locations": [
115117
{
@@ -137,6 +139,7 @@ def f():
137139
"ruleId": "python.lang.security.audit.formatted-sql-query.formatted-sql-query",
138140
},
139141
{
142+
"guid": "300df87d-e713-4cd4-a245-d64f25be03de",
140143
"fingerprints": {"matchBasedId/v1": "123"},
141144
"locations": [
142145
{
@@ -164,6 +167,7 @@ def f():
164167
"ruleId": "python.sqlalchemy.security.sqlalchemy-execute-raw-query.sqlalchemy-execute-raw-query",
165168
},
166169
{
170+
"guid": "24695222-6db9-4e12-8555-c5e74eb7fe0f",
167171
"fingerprints": {"matchBasedId/v1": "123"},
168172
"locations": [
169173
{
@@ -191,6 +195,7 @@ def f():
191195
"ruleId": "python.django.security.injection.tainted-sql-string.tainted-sql-string",
192196
},
193197
{
198+
"guid": "c8355088-665d-4fe1-8790-725964ba0769",
194199
"fingerprints": {"matchBasedId/v1": "123"},
195200
"locations": [
196201
{

0 commit comments

Comments
 (0)