@@ -134,19 +134,17 @@ pub(super) fn matches_regex(pattern: &EvalPattern, comment: &core::Comment) -> b
134134
135135pub ( super ) fn matches_severity ( pattern : & EvalPattern , comment : & core:: Comment ) -> bool {
136136 pattern. severity . as_ref ( ) . is_none_or ( |severity| {
137- comment
138- . severity
139- . to_string ( )
140- . eq_ignore_ascii_case ( severity. trim ( ) )
137+ let expected = severity. trim ( ) ;
138+ comment. severity . to_string ( ) . eq_ignore_ascii_case ( expected)
139+ || severity_rank ( comment. severity . as_str ( ) ) >= severity_rank ( expected)
141140 } )
142141}
143142
144143pub ( super ) fn matches_category ( pattern : & EvalPattern , comment : & core:: Comment ) -> bool {
145144 pattern. category . as_ref ( ) . is_none_or ( |category| {
146- comment
147- . category
148- . to_string ( )
149- . eq_ignore_ascii_case ( category. trim ( ) )
145+ let expected = category. trim ( ) ;
146+ comment. category . to_string ( ) . eq_ignore_ascii_case ( expected)
147+ || semantic_category_matches ( expected, comment)
150148 } )
151149}
152150
@@ -195,22 +193,126 @@ fn semantic_text_matches(content: &str, needle: &str) -> bool {
195193 . all ( |token| content_tokens. iter ( ) . any ( |candidate| candidate == token) )
196194}
197195
196+ fn semantic_category_matches ( expected : & str , comment : & core:: Comment ) -> bool {
197+ let expected = canonicalize_category ( expected) ;
198+ if expected. is_empty ( ) {
199+ return true ;
200+ }
201+ if canonicalize_category ( & comment. category . to_string ( ) ) == expected {
202+ return true ;
203+ }
204+
205+ let search_space = format ! (
206+ "{} {}" ,
207+ comment. content. to_ascii_lowercase( ) ,
208+ comment. tags. join( " " ) . to_ascii_lowercase( )
209+ ) ;
210+ category_aliases ( & expected)
211+ . iter ( )
212+ . any ( |alias| semantic_text_matches ( & search_space, alias) )
213+ }
214+
215+ fn canonicalize_category ( value : & str ) -> String {
216+ value
217+ . trim ( )
218+ . to_ascii_lowercase ( )
219+ . chars ( )
220+ . filter ( |ch| ch. is_ascii_alphanumeric ( ) )
221+ . collect ( )
222+ }
223+
224+ fn category_aliases ( expected : & str ) -> & ' static [ & ' static str ] {
225+ match expected {
226+ "security" => & [
227+ "security" ,
228+ "authorization" ,
229+ "authentication" ,
230+ "access control" ,
231+ "permission" ,
232+ "privilege escalation" ,
233+ "authorization bypass" ,
234+ "idor" ,
235+ "injection" ,
236+ "path traversal" ,
237+ "open redirect" ,
238+ "supply chain" ,
239+ "secret" ,
240+ "forbidden" ,
241+ "unauthorized" ,
242+ ] ,
243+ "bug" => & [
244+ "bug" ,
245+ "panic" ,
246+ "crash" ,
247+ "nil" ,
248+ "null" ,
249+ "fire and forget" ,
250+ "detached task" ,
251+ "background task" ,
252+ "spawned task" ,
253+ "not awaited" ,
254+ "missing await" ,
255+ "promise is always truthy" ,
256+ "swallowed error" ,
257+ "logic error" ,
258+ "race condition" ,
259+ "deadlock" ,
260+ ] ,
261+ "performance" => & [
262+ "performance" ,
263+ "slow" ,
264+ "latency" ,
265+ "n plus one" ,
266+ "query inside loop" ,
267+ "memory leak" ,
268+ ] ,
269+ "style" => & [ "style" , "format" , "naming" , "lint" ] ,
270+ "documentation" => & [ "documentation" , "docstring" , "docs" ] ,
271+ "bestpractice" => & [ "best practice" , "robustness" , "guardrail" ] ,
272+ "maintainability" => & [
273+ "maintainability" ,
274+ "readability" ,
275+ "duplication" ,
276+ "complexity" ,
277+ "refactor" ,
278+ ] ,
279+ "testing" => & [ "testing" , "test coverage" , "missing test" ] ,
280+ "architecture" => & [ "architecture" , "design" , "abstraction" , "coupling" ] ,
281+ _ => & [ ] ,
282+ }
283+ }
284+
198285fn canonicalize_semantic_text ( text : & str ) -> String {
199286 let mut canonical = text. to_ascii_lowercase ( ) ;
200287 for ( source, replacement) in [
201288 ( "authz" , "authorization" ) ,
202289 ( "authorisation" , "authorization" ) ,
203290 ( "access control" , "authorization" ) ,
204291 ( "broken access control" , "authorization bypass" ) ,
292+ ( "verbose-error" , "information disclosure" ) ,
293+ ( "verbose error" , "information disclosure" ) ,
294+ ( "debug-details" , "information disclosure" ) ,
295+ ( "debug details" , "information disclosure" ) ,
296+ ( "stack-trace" , "information disclosure" ) ,
297+ ( "stack trace" , "information disclosure" ) ,
298+ ( "cwe-209" , "information disclosure" ) ,
299+ ( "cwe 209" , "information disclosure" ) ,
205300 ( "piping curl output directly to bash" , "curl pipe to shell" ) ,
206301 ( "pipe curl output directly to bash" , "curl pipe to shell" ) ,
207302 (
208303 "piping remote script directly to bash" ,
209304 "curl pipe to shell" ,
210305 ) ,
211306 ( "piping a remote script to bash" , "curl pipe to shell" ) ,
307+ ( "arbitrary shell command execution" , "command injection" ) ,
308+ (
309+ "without input validation or sanitization" ,
310+ "user controlled command" ,
311+ ) ,
212312 ( "untrusted code" , "remote script" ) ,
213313 ( "attack vector" , "risk" ) ,
314+ ( "silently discarded" , "swallowed error" ) ,
315+ ( "silent failure" , "swallowed error" ) ,
214316 ( "sqli" , "sql injection" ) ,
215317 ( "xss" , "cross site scripting" ) ,
216318 ( "ssrf" , "server side request forgery" ) ,
@@ -239,6 +341,16 @@ fn canonicalize_semantic_text(text: &str) -> String {
239341 canonical
240342}
241343
344+ fn severity_rank ( value : & str ) -> usize {
345+ match canonicalize_category ( value) . as_str ( ) {
346+ "error" => 3 ,
347+ "warning" => 2 ,
348+ "suggestion" => 1 ,
349+ "info" => 0 ,
350+ _ => 0 ,
351+ }
352+ }
353+
242354fn semantic_tokens ( text : & str ) -> Vec < String > {
243355 text. split ( |ch : char | !ch. is_ascii_alphanumeric ( ) )
244356 . map ( str:: trim)
0 commit comments