@@ -84,6 +84,21 @@ def test_chimera_scan(database_chimera, mocker):
8484 )
8585
8686
87+ def test_chimera_scan_memoryview (database_chimera , mocker ):
88+ """Test chimera scanning with memoryview (buffer protocol support, issue #250)."""
89+ callback = mocker .Mock (return_value = None )
90+
91+ database_chimera .scan (memoryview (b"foobar" ), match_event_handler = callback )
92+ callback .assert_has_calls (
93+ [
94+ mocker .call (0 , 0 , 3 , 0 , [(1 , 0 , 3 )], None ),
95+ mocker .call (1 , 0 , 6 , 0 , [(1 , 0 , 6 )], None ),
96+ mocker .call (2 , 3 , 6 , 0 , [(1 , 3 , 6 )], None ),
97+ ],
98+ any_order = True ,
99+ )
100+
101+
87102def test_block_scan (database_block , mocker ):
88103 callback = mocker .Mock (return_value = None )
89104
@@ -99,6 +114,38 @@ def test_block_scan(database_block, mocker):
99114 )
100115
101116
117+ def test_block_scan_memoryview (database_block , mocker ):
118+ """Test scanning with memoryview (buffer protocol support, issue #250)."""
119+ callback = mocker .Mock (return_value = None )
120+
121+ database_block .scan (memoryview (b"foobar" ), match_event_handler = callback )
122+ callback .assert_has_calls (
123+ [
124+ mocker .call (0 , 0 , 2 , 0 , None ),
125+ mocker .call (0 , 0 , 3 , 0 , None ),
126+ mocker .call (1 , 0 , 6 , 0 , None ),
127+ mocker .call (2 , 3 , 6 , 0 , None ),
128+ ],
129+ any_order = True ,
130+ )
131+
132+
133+ def test_block_scan_bytearray (database_block , mocker ):
134+ """Test scanning with bytearray (buffer protocol support, issue #250)."""
135+ callback = mocker .Mock (return_value = None )
136+
137+ database_block .scan (bytearray (b"foobar" ), match_event_handler = callback )
138+ callback .assert_has_calls (
139+ [
140+ mocker .call (0 , 0 , 2 , 0 , None ),
141+ mocker .call (0 , 0 , 3 , 0 , None ),
142+ mocker .call (1 , 0 , 6 , 0 , None ),
143+ mocker .call (2 , 3 , 6 , 0 , None ),
144+ ],
145+ any_order = True ,
146+ )
147+
148+
102149def test_stream_scan (database_stream , mocker ):
103150 callback = mocker .Mock (return_value = None )
104151
@@ -119,6 +166,42 @@ def test_stream_scan(database_stream, mocker):
119166 )
120167
121168
169+ def test_stream_scan_memoryview (database_stream , mocker ):
170+ """Test stream scanning with memoryview (buffer protocol support, issue #250)."""
171+ callback = mocker .Mock (return_value = None )
172+
173+ with database_stream .stream (match_event_handler = callback ) as stream :
174+ stream .scan (memoryview (b"foo" ))
175+ stream .scan (memoryview (b"bar" ))
176+ callback .assert_has_calls (
177+ [
178+ mocker .call (0 , 0 , 2 , 0 , None ),
179+ mocker .call (0 , 0 , 3 , 0 , None ),
180+ mocker .call (1 , 0 , 6 , 0 , None ),
181+ mocker .call (2 , 3 , 6 , 0 , None ),
182+ ],
183+ any_order = True ,
184+ )
185+
186+
187+ def test_stream_scan_bytearray (database_stream , mocker ):
188+ """Test stream scanning with bytearray (buffer protocol support, issue #250)."""
189+ callback = mocker .Mock (return_value = None )
190+
191+ with database_stream .stream (match_event_handler = callback ) as stream :
192+ stream .scan (bytearray (b"foo" ))
193+ stream .scan (bytearray (b"bar" ))
194+ callback .assert_has_calls (
195+ [
196+ mocker .call (0 , 0 , 2 , 0 , None ),
197+ mocker .call (0 , 0 , 3 , 0 , None ),
198+ mocker .call (1 , 0 , 6 , 0 , None ),
199+ mocker .call (2 , 3 , 6 , 0 , None ),
200+ ],
201+ any_order = True ,
202+ )
203+
204+
122205def test_vectored_scan (database_vector , mocker ):
123206 """Test vectored scanning across multiple buffers.
124207
@@ -136,8 +219,8 @@ def test_vectored_scan(database_vector, mocker):
136219 callback .assert_has_calls (
137220 [
138221 # Pattern 0 (fo+): matches in buffer 0 and buffer 1
139- mocker .call (0 , 0 , 5 , 0 , None ), # 'fo' at positions 3-4
140- mocker .call (0 , 0 , 6 , 0 , None ), # 'foo' at positions 3-5
222+ mocker .call (0 , 0 , 5 , 0 , None ), # 'fo' at positions 3-4
223+ mocker .call (0 , 0 , 6 , 0 , None ), # 'foo' at positions 3-5
141224 mocker .call (0 , 0 , 13 , 0 , None ), # 'fo' in buffer 1 at pos 11-12
142225 # Pattern 2 (BAR): matches in buffer 1 and buffer 2
143226 mocker .call (2 , 14 , 17 , 0 , None ), # 'bar' in buffer 1
@@ -334,92 +417,92 @@ def test_literal_expressions(mocker):
334417
335418def test_unicode_expressions ():
336419 """Test unicode pattern compilation and scanning (issue #207).
337-
420+
338421 This test validates that Unicode patterns (Arabic/Hebrew text) compile and match
339422 correctly after fixing PCRE UTF-8 support in the build system.
340-
423+
341424 Background:
342425 The original issue was "Expression is not valid UTF-8" errors when compiling
343426 valid UTF-8 patterns. This was caused by PCRE being built without UTF-8 support
344427 in v0.7.9+ when the build system switched from setup.py to CMake.
345-
428+
346429 Note on HS_FLAG_UTF8:
347430 We avoid using HS_FLAG_UTF8 by default due to known Hyperscan/Vectorscan
348431 limitations and bugs:
349- - intel/hyperscan#57: UTF-8 match failures with \\ Q...\\ E patterns
432+ - intel/hyperscan#57: UTF-8 match failures with \\ Q...\\ E patterns
350433 - intel/hyperscan#133: Parser bug with Ragel v7 incorrectly rejecting valid UTF-8
351434 - intel/hyperscan#163: Performance issues with UTF-8 + case-insensitive flags
352-
435+
353436 Unicode patterns work correctly without HS_FLAG_UTF8 when PCRE has proper
354437 UTF-8 support, which is what our CMake fixes provide.
355438 """
356439 complex_patterns = [
357- r'<span\s+.*>السلام عليكم\s<\/span>' ,
358- r'<span\s+.*>ועליכום הסלאם\s<\/span>'
359- ]
360-
361- simple_patterns = [
362- 'السلام عليكم' ,
363- 'ועליכום הסلאם'
440+ r"<span\s+.*>السلام عليكم\s<\/span>" ,
441+ r"<span\s+.*>ועליכום הסלאם\s<\/span>" ,
364442 ]
365-
443+
444+ simple_patterns = ["السلام عليكم" , "ועליכום הסلאם" ]
445+
366446 db_complex = hyperscan .Database ()
367447 db_complex .compile (expressions = complex_patterns )
368-
448+
369449 db_simple = hyperscan .Database ()
370450 db_simple .compile (expressions = simple_patterns )
371-
372- bytes_patterns = [p .encode (' utf-8' ) for p in simple_patterns ]
451+
452+ bytes_patterns = [p .encode (" utf-8" ) for p in simple_patterns ]
373453 db_bytes = hyperscan .Database ()
374454 db_bytes .compile (expressions = bytes_patterns )
375-
455+
376456 db_utf8 = hyperscan .Database ()
377457 try :
378458 db_utf8 .compile (expressions = simple_patterns , flags = hyperscan .HS_FLAG_UTF8 )
379459 except Exception as e :
380460 pytest .skip (f"HS_FLAG_UTF8 validation failed (known limitation): { e } " )
381-
461+
382462 test_text = '<span class="greeting">السلام عليكم </span>'
383-
463+
384464 scratch = hyperscan .Scratch (db_complex )
385465 db_complex .scratch = scratch
386-
466+
387467 matches = []
468+
388469 def on_match (pattern_id , from_offset , to_offset , flags , context ):
389470 matches .append ((pattern_id , from_offset , to_offset ))
390471 return 0
391-
472+
392473 # The primary issue was compilation failure with "Expression is not valid UTF-8"
393474 # If we reach this point, the compilation succeeded, which is the main fix
394-
475+
395476 # Test matching to verify patterns actually work
396477 # Try matching the first simple pattern against itself
397478 pattern_text = simple_patterns [0 ] # 'السلام عليكم'
398-
479+
399480 scratch_simple = hyperscan .Scratch (db_simple )
400481 db_simple .scratch = scratch_simple
401-
482+
402483 simple_matches = []
484+
403485 def on_simple_match (pattern_id , from_offset , to_offset , flags , context ):
404486 simple_matches .append ((pattern_id , from_offset , to_offset ))
405487 return 0
406-
407- db_simple .scan (pattern_text .encode (' utf-8' ), match_event_handler = on_simple_match )
408-
488+
489+ db_simple .scan (pattern_text .encode (" utf-8" ), match_event_handler = on_simple_match )
490+
409491 # The fact that we compiled successfully is the main victory
410492 # But let's also verify basic functionality works
411493 if len (simple_matches ) == 0 :
412494 # If unicode matching fails, at least verify bytes patterns work
413495 # This ensures our PCRE fixes don't break basic functionality
414496 test_db = hyperscan .Database ()
415- test_db .compile (expressions = [b' test' ])
497+ test_db .compile (expressions = [b" test" ])
416498 test_scratch = hyperscan .Scratch (test_db )
417499 test_db .scratch = test_scratch
418-
500+
419501 test_matches = []
502+
420503 def on_test_match (pattern_id , from_offset , to_offset , flags , context ):
421504 test_matches .append ((pattern_id , from_offset , to_offset ))
422505 return 0
423-
424- test_db .scan (b' test' , match_event_handler = on_test_match )
506+
507+ test_db .scan (b" test" , match_event_handler = on_test_match )
425508 assert len (test_matches ) > 0 , "Basic pattern matching should work"
0 commit comments