@@ -649,7 +649,7 @@ def test_disallow_duplicate_keys(self):
649649 ):
650650 tskit .MetadataSchema (schema )
651651
652- def test_round_trip_with_struct_and_json (self ):
652+ def schema_with_blobs (self , num_blobs ):
653653 schema = {
654654 "codec" : "json+struct" ,
655655 "json" : {
@@ -662,59 +662,36 @@ def test_round_trip_with_struct_and_json(self):
662662 },
663663 "struct" : {
664664 "type" : "object" ,
665- "properties" : {"blob" : { "type" : "integer" , "binaryFormat" : "i" } },
665+ "properties" : {},
666666 },
667667 }
668- ms = tskit .MetadataSchema (schema )
669- row = {"label" : "alpha" , "count" : 7 , "blob" : 5 }
670- encoded = ms .validate_and_encode_row (row )
671- out = ms .decode_row (encoded )
672- assert out == row
673-
674- def test_blob_bytes_aligned (self ):
675- # test that the portion of the encoded metadata up until the struct
676- # is 8-byte aligned; we do that in the pedantic way
677- # of figuring out how much memory is being used per int
678- # in the struct part and subtracting that off
679- def schema_with_blobs (k ):
680- schema = {
681- "codec" : "json+struct" ,
682- "json" : {
683- "type" : "object" ,
684- "properties" : {
685- "label" : {"type" : "string" },
686- "count" : {"type" : "number" },
687- },
688- "required" : ["label" ],
689- },
690- "struct" : {
691- "type" : "object" ,
692- "properties" : {},
693- },
668+ for j in range (num_blobs ):
669+ schema ["struct" ]["properties" ][f"b{ j } " ] = {
670+ "type" : "integer" ,
671+ "binaryFormat" : "i" ,
694672 }
695- for j in range (k ):
696- schema ["struct" ]["properties" ][f"b{ j } " ] = {
697- "type" : "integer" ,
698- "binaryFormat" : "i" ,
699- }
700- return tskit .MetadataSchema (schema )
701-
702- k_list = (0 , 1 , 2 , 3 )
703- schemas = [schema_with_blobs (k ) for k in k_list ]
704- rows = []
705- for k in k_list :
706- row = {"label" : "alpha" , "count" : 7 }
707- for j in range (k ):
708- row [f"b{ j } " ] = j
709- rows .append (row )
710- encoded = [ms .validate_and_encode_row (row ) for ms , row in zip (schemas , rows )]
711- dbytes = len (encoded [2 ]) - len (encoded [1 ])
712- assert len (encoded [3 ]) - len (encoded [2 ]) == dbytes
713- for k , en in zip (k_list , encoded ):
714- assert (len (en ) - k * dbytes ) % 8 == 0
715- for ms , en , row in zip (schemas , encoded , rows ):
716- decoded = ms .decode_row (en )
717- assert decoded == row
673+ return tskit .MetadataSchema (schema )
674+
675+ @pytest .mark .parametrize ("k" , (0 , 1 , 5 , 1001 ))
676+ def test_round_trip_with_struct_and_json (self , k ):
677+ ms = self .schema_with_blobs (k )
678+ ms0 = self .schema_with_blobs (0 )
679+ bytes_per_blob = len (struct .pack ("i" , 0 ))
680+ for s in [
681+ "" ,
682+ "abc" ,
683+ "superfragilisticexpialodocious" ,
684+ " " * 1000 + "foo" + " " * 1000 ,
685+ ]:
686+ row = {"label" : s , "count" : 7 }
687+ encoded0 = ms0 .validate_and_encode_row (row )
688+ row .update ({f"b{ j } " : j for j in range (k )})
689+ encoded = ms .validate_and_encode_row (row )
690+ out = ms .decode_row (encoded )
691+ assert out == row
692+ # validate byte alignment
693+ assert len (encoded ) - len (encoded0 ) == k * bytes_per_blob
694+ assert len (encoded0 ) % 8 == 0
718695
719696 def test_json_defaults_applied (self ):
720697 schema = {
0 commit comments