Skip to content

Commit 5363644

Browse files
committed
better testing
1 parent 284c660 commit 5363644

2 files changed

Lines changed: 31 additions & 52 deletions

File tree

python/tests/test_metadata.py

Lines changed: 28 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ def test_disallow_duplicate_keys(self):
649649
):
650650
tskit.MetadataSchema(schema)
651651

652-
def test_round_trip_with_struct_and_json(self):
652+
def schema_with_blobs(self, num_blobs):
653653
schema = {
654654
"codec": "json+struct",
655655
"json": {
@@ -662,59 +662,36 @@ def test_round_trip_with_struct_and_json(self):
662662
},
663663
"struct": {
664664
"type": "object",
665-
"properties": {"blob": {"type": "integer", "binaryFormat": "i"}},
665+
"properties": {},
666666
},
667667
}
668-
ms = tskit.MetadataSchema(schema)
669-
row = {"label": "alpha", "count": 7, "blob": 5}
670-
encoded = ms.validate_and_encode_row(row)
671-
out = ms.decode_row(encoded)
672-
assert out == row
673-
674-
def test_blob_bytes_aligned(self):
675-
# test that the portion of the encoded metadata up until the struct
676-
# is 8-byte aligned; we do that in the pedantic way
677-
# of figuring out how much memory is being used per int
678-
# in the struct part and subtracting that off
679-
def schema_with_blobs(k):
680-
schema = {
681-
"codec": "json+struct",
682-
"json": {
683-
"type": "object",
684-
"properties": {
685-
"label": {"type": "string"},
686-
"count": {"type": "number"},
687-
},
688-
"required": ["label"],
689-
},
690-
"struct": {
691-
"type": "object",
692-
"properties": {},
693-
},
668+
for j in range(num_blobs):
669+
schema["struct"]["properties"][f"b{j}"] = {
670+
"type": "integer",
671+
"binaryFormat": "i",
694672
}
695-
for j in range(k):
696-
schema["struct"]["properties"][f"b{j}"] = {
697-
"type": "integer",
698-
"binaryFormat": "i",
699-
}
700-
return tskit.MetadataSchema(schema)
701-
702-
k_list = (0, 1, 2, 3)
703-
schemas = [schema_with_blobs(k) for k in k_list]
704-
rows = []
705-
for k in k_list:
706-
row = {"label": "alpha", "count": 7}
707-
for j in range(k):
708-
row[f"b{j}"] = j
709-
rows.append(row)
710-
encoded = [ms.validate_and_encode_row(row) for ms, row in zip(schemas, rows)]
711-
dbytes = len(encoded[2]) - len(encoded[1])
712-
assert len(encoded[3]) - len(encoded[2]) == dbytes
713-
for k, en in zip(k_list, encoded):
714-
assert (len(en) - k * dbytes) % 8 == 0
715-
for ms, en, row in zip(schemas, encoded, rows):
716-
decoded = ms.decode_row(en)
717-
assert decoded == row
673+
return tskit.MetadataSchema(schema)
674+
675+
@pytest.mark.parametrize("k", (0, 1, 5, 1001))
676+
def test_round_trip_with_struct_and_json(self, k):
677+
ms = self.schema_with_blobs(k)
678+
ms0 = self.schema_with_blobs(0)
679+
bytes_per_blob = len(struct.pack("i", 0))
680+
for s in [
681+
"",
682+
"abc",
683+
"superfragilisticexpialodocious",
684+
" " * 1000 + "foo" + " " * 1000,
685+
]:
686+
row = {"label": s, "count": 7}
687+
encoded0 = ms0.validate_and_encode_row(row)
688+
row.update({f"b{j}": j for j in range(k)})
689+
encoded = ms.validate_and_encode_row(row)
690+
out = ms.decode_row(encoded)
691+
assert out == row
692+
# validate byte alignment
693+
assert len(encoded) - len(encoded0) == k * bytes_per_blob
694+
assert len(encoded0) % 8 == 0
718695

719696
def test_json_defaults_applied(self):
720697
schema = {

python/tskit/metadata.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,9 @@ def decode(self, encoded: bytes) -> Any:
318318
"Invalid json+struct payload: declared lengths exceed buffer size"
319319
)
320320
json_bytes = encoded[start : start + jlen]
321-
blob_bytes = encoded[start + jlen : start + jlen + blen + padding_length]
321+
blob_bytes = encoded[
322+
start + jlen + padding_length : start + jlen + padding_length + blen
323+
]
322324
json_data = self.json_codec.decode(json_bytes)
323325
struct_data = self.struct_codec.decode(blob_bytes)
324326
overlap = set(json_data).intersection(struct_data)

0 commit comments

Comments
 (0)