|
| 1 | +'''Implements low-level json pointer parsing. See `RFC 6901 Section 4 <https://www.rfc-editor.org/rfc/rfc6901#section-4>`_ for the |
| 2 | +specification that this parser adheres to. |
| 3 | +''' |
| 4 | + |
1 | 5 | import re |
2 | 6 | from typing import * |
3 | 7 |
|
|
6 | 10 | RE_INVALID_ESCAPE = re.compile("(~[^01]|~$)") |
7 | 11 |
|
8 | 12 |
|
9 | | -def validate(s: str) -> None: |
10 | | - if match := RE_INVALID_ESCAPE.search(s): |
| 13 | +def validate(pointer: str) -> None: |
| 14 | + '''Validate that a string is a well formed json pointer. |
| 15 | + |
| 16 | + :raises: :exc:`.ParseException`: If json pointer is invalid. |
| 17 | +
|
| 18 | + >>> validate('') |
| 19 | + >>> validate('foo') # parts must lead with '/' |
| 20 | + Traceback (most recent call last): |
| 21 | + ... |
| 22 | + fast_json_pointer.exceptions.ParseException: JSON pointers must be empty or start with '/' |
| 23 | + >>> validate('/foo~') # ~ must be followed by either 0 or 1 |
| 24 | + Traceback (most recent call last): |
| 25 | + ... |
| 26 | + fast_json_pointer.exceptions.ParseException: Found invalid escape ~ |
| 27 | + >>> validate('/~2/foo') # only ~0, ~1 are valid escapes |
| 28 | + Traceback (most recent call last): |
| 29 | + ... |
| 30 | + fast_json_pointer.exceptions.ParseException: Found invalid escape ~2 |
| 31 | + ''' |
| 32 | + |
| 33 | + if len(pointer) > 0 and not pointer.startswith("/"): |
| 34 | + raise ParseException("JSON pointers must be empty or start with '/'") |
| 35 | + |
| 36 | + if match := RE_INVALID_ESCAPE.search(pointer): |
11 | 37 | raise ParseException("Found invalid escape {}".format(match.group())) |
12 | 38 |
|
13 | 39 |
|
14 | | -def parse(s: str) -> list[str]: |
15 | | - validate(s) |
| 40 | +def parse(pointer: str) -> list[str]: |
| 41 | + r'''Parse a json pointer into a list of unescaped path parts. |
| 42 | +
|
| 43 | + :raises: :exc:`.ParseException`: If json pointer is invalid. |
| 44 | +
|
| 45 | + >>> parse('') # empty string is "the whole json object" |
| 46 | + [] |
| 47 | + >>> parse('/') # keys can be zero-length strings |
| 48 | + [''] |
| 49 | + >>> parse('/ // ') # which can look funky |
| 50 | + [' ', '', ' '] |
| 51 | + >>> parse('/foo/m~0n/a~1b') # ~1 escapes /, ~0 escapes ~ |
| 52 | + ['foo', 'm~n', 'a/b'] |
| 53 | + >>> parse('/c%d/e^f') # funky symbols are fine too! |
| 54 | + ['c%d', 'e^f'] |
| 55 | + >>> parse(r'/i\\j/g|h/k\l') # r-string avoids escaping backslashes |
| 56 | + ['i\\\\j', 'g|h', 'k\\l'] |
| 57 | + ''' |
| 58 | + validate(pointer) |
16 | 59 |
|
17 | | - parts = s.split("/") |
18 | | - # discard "empty" str, as "/foo/bar" becomes ["", "foo", "bar"] |
19 | | - if parts.pop(0) != "": |
20 | | - raise ParseException("JSON pointers must start with /") |
| 60 | + parts = pointer.split("/") |
| 61 | + # discard "empty" str, as "/foo/bar".split() becomes ["", "foo", "bar"] |
| 62 | + parts.pop(0) |
21 | 63 | return [unescape(p) for p in parts] |
22 | 64 |
|
23 | 65 |
|
24 | 66 | def unparse(parts: Iterable[str]) -> str: |
25 | | - return "/" + "/".join(escape(part) for part in parts) |
| 67 | + r'''Combine an iterable of unescaped path parts into a json pointer. |
| 68 | + |
| 69 | + >>> unparse([]) |
| 70 | + '' |
| 71 | + >>> unparse(['']) |
| 72 | + '/' |
| 73 | + >>> unparse([' ', '', ' ']) |
| 74 | + '/ // ' |
| 75 | + >>> unparse(['foo', 'm~n', 'a/b']) |
| 76 | + '/foo/m~0n/a~1b' |
| 77 | + >>> unparse(['c%d', 'e^f']) |
| 78 | + '/c%d/e^f' |
| 79 | + >>> unparse([r'i\\j', 'g|h', r'k\l']) |
| 80 | + '/i\\\\j/g|h/k\\l' |
| 81 | + ''' |
| 82 | + return "".join('/' + escape(part) for part in parts) |
26 | 83 |
|
27 | 84 |
|
28 | | -def escape(s: str) -> str: |
| 85 | +def escape(part: str) -> str: |
| 86 | + '''Escape a path part. |
| 87 | + |
| 88 | + >>> escape("foo") |
| 89 | + 'foo' |
| 90 | + >>> escape("m~/0") |
| 91 | + 'm~0~10' |
| 92 | + ''' |
29 | 93 | # Escape `~` first! https://www.rfc-editor.org/rfc/rfc6901#section-4 |
30 | | - return s.replace("~", "~0").replace("/", "~1") |
| 94 | + return part.replace("~", "~0").replace("/", "~1") |
31 | 95 |
|
32 | 96 |
|
33 | | -def unescape(s: str) -> str: |
| 97 | +def unescape(part: str) -> str: |
| 98 | + '''Unescape a path part. |
| 99 | + |
| 100 | + >>> unescape("foo") |
| 101 | + 'foo' |
| 102 | + >>> unescape("m~0~10") |
| 103 | + 'm~/0' |
| 104 | + ''' |
34 | 105 | # Unscape `~` last! https://www.rfc-editor.org/rfc/rfc6901#section-4 |
35 | | - return s.replace("~1", "/").replace("~0", "~") |
| 106 | + return part.replace("~1", "/").replace("~0", "~") |
0 commit comments