Skip to content

Commit b163506

Browse files
committed
add generic protocol message serialize/deserialize functionality
Signed-off-by: Lance-Drane <Lance-Drane@users.noreply.github.com>
1 parent 0c6a5a5 commit b163506

2 files changed

Lines changed: 125 additions & 0 deletions

File tree

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""This module contains the binary specification for an INTERSECT message IF the protocol does not provide any built-in header support. If it does, use the built-in functionality of the protocol.
2+
3+
Some protocols do not have any built-in header support, leaving it up to us to define the binary structure of the message. We want to avoid "chunking" messages into multiple parts, so a message should be guaranteed to include all metadata.
4+
"""
5+
6+
from intersect_sdk_common import IntersectApplicationError
7+
8+
_HEADER_KV_SEPARATOR = b'\x02'
9+
"""Indicates end of a header key and start of a header value.
10+
11+
Used because applications should generally assign no special meaning to this byte, and this byte has no reason to appear in header keys or values.
12+
"""
13+
_HEADER_VALUE_SEPARATOR = b'\x03'
14+
"""Indicates end of a header value and start of the next header key.
15+
16+
Used because applications should generally assign no special meaning to this byte, and this byte has no reason to appear in header keys or values.
17+
"""
18+
_PAYLOAD_SEPARATOR = b'\x01'
19+
"""Indicates end of headers and start of the payload.
20+
21+
Used because applications should generally assign no special meaning to this byte, and this byte has no reason to appear in header keys or values.
22+
"""
23+
24+
_TOTAL_REASONABLE_HEADER_BYTES = 131072
25+
"""The total number of bytes that should be reasonably expected to be used for header keys, values, and header separators combined. This provides applications with some level of DOS protection.
26+
27+
It is rare in practice for ANY application to use this number of bytes for the total amount of headers, for example http2_max_header_size in NGINX is rarely set above 128KB
28+
"""
29+
30+
CONTENT_TYPE_HEADER_KEY = 'content_type'
31+
CONTENT_TYPE_HEADER_KEY_BYTES = CONTENT_TYPE_HEADER_KEY.encode()
32+
33+
34+
def create_binary_message(body: bytes, content_type: str, headers: dict[str, str]) -> bytes:
35+
"""Create a binary message from the body, headers, and content type."""
36+
return b''.join(
37+
[
38+
# content-type 'header' first (this is generally handled separately from other headers in many protocols)
39+
CONTENT_TYPE_HEADER_KEY_BYTES,
40+
_HEADER_KV_SEPARATOR,
41+
content_type.encode(),
42+
_HEADER_VALUE_SEPARATOR if len(headers) else b'',
43+
# headers
44+
_HEADER_VALUE_SEPARATOR.join(
45+
_HEADER_KV_SEPARATOR.join([key.encode(), value.encode()])
46+
for key, value in headers.items()
47+
),
48+
# end of headers
49+
_PAYLOAD_SEPARATOR,
50+
# body
51+
body,
52+
]
53+
)
54+
55+
56+
def parse_binary_message(message: bytes) -> tuple[bytes, str, dict[str, str]]:
57+
"""Parse a binary message into its body, content type, and headers."""
58+
# IMPORTANT!!! ----- Total length of header keys and values combined should be limited to first several bytes, terminate header search early if headers aren't a reasonable length.
59+
payload_sep_location = message.find(_PAYLOAD_SEPARATOR, 0, _TOTAL_REASONABLE_HEADER_BYTES)
60+
if payload_sep_location == -1:
61+
msg = 'Probable malformed message: no payload separator found in first expected bytes, .'
62+
raise IntersectApplicationError(msg)
63+
header_string = message[:payload_sep_location]
64+
headers = {
65+
key.decode(): value.decode()
66+
for key, value in (
67+
header.split(_HEADER_KV_SEPARATOR)
68+
for header in header_string.split(_HEADER_VALUE_SEPARATOR)
69+
)
70+
}
71+
try:
72+
content_type = headers.pop(CONTENT_TYPE_HEADER_KEY)
73+
except KeyError as e:
74+
msg = 'Probable malformed message: no content_type header found in message, discarding it.'
75+
raise IntersectApplicationError(msg) from e
76+
77+
return message[payload_sep_location + 1 :], content_type, headers
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import pytest
2+
3+
from intersect_sdk_common.control_plane.custom_binary_protocol import (
4+
CONTENT_TYPE_HEADER_KEY,
5+
create_binary_message,
6+
parse_binary_message,
7+
)
8+
9+
10+
@pytest.mark.parametrize(
11+
('body', 'content_type', 'headers'),
12+
[
13+
(b'{"key": "value"}', 'application/json', {'header1': 'value1', 'header2': 'value2'}),
14+
(b'<xml><key>value</key></xml>', 'application/xml', {'header1': 'value1'}),
15+
(b'plain text body', 'text/plain', {}),
16+
(
17+
b'I\x01Have\x02Special\x03Characters',
18+
'text/plain',
19+
{'But headers': 'cannot have control characters'},
20+
),
21+
],
22+
)
23+
def test_custom_binary_protocol_idempotency(
24+
body: bytes, content_type: str, headers: dict[str, str]
25+
):
26+
message = create_binary_message(body, content_type, headers)
27+
parsed_body, parsed_content_type, parsed_headers = parse_binary_message(message)
28+
29+
assert parsed_body == body
30+
assert parsed_content_type == content_type
31+
assert parsed_headers == headers
32+
33+
# message body should be at the end of the full message
34+
body_len = len(body)
35+
assert body == message[-body_len:]
36+
37+
# complete message should only add 2 bytes for each metadata field
38+
assert (
39+
len(body)
40+
+ len(CONTENT_TYPE_HEADER_KEY)
41+
+ len(content_type)
42+
+ 2 # header key separator and header value terminator
43+
+ sum(
44+
len(key) + len(value) + 2 # header key separator and header value terminator
45+
for key, value in headers.items()
46+
)
47+
== len(message)
48+
)

0 commit comments

Comments
 (0)