Skip to content

Commit 7c3363d

Browse files
committed
shortener WIP
1 parent f1fde6e commit 7c3363d

File tree

2 files changed

+108
-23
lines changed

2 files changed

+108
-23
lines changed

links/shortener.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
## Targets in memory
2323
2424
The `targets` dict maps target URLs to short paths.
25-
It's also loaded from data in the `.htaccess` file,
25+
It's also computed from data in the `.htaccess` file,
2626
but the algorithm is more complicated.
2727
2828
The same target URL can be mapped to multiple short paths
29-
in `.htaccess` when the same target URL was added more
29+
in `.htaccess` if the same target URL was added more
3030
than once with different short paths by mistake.
3131
We cannot fix these mistakes because the redundant
3232
short paths are printed in Fluent Python Second Edition.
@@ -57,6 +57,13 @@
5757

5858
import itertools
5959
from collections.abc import Iterable, Iterator
60+
from typing import NamedTuple
61+
62+
63+
class ShortenResult(NamedTuple):
64+
url: str
65+
path: str
66+
new: bool
6067

6168

6269
def parse_htaccess(text: str) -> Iterator[tuple[str, str]]:
@@ -95,6 +102,22 @@ def load_redirects(pairs: Iterable[tuple[str, str]]) -> tuple[dict, dict]:
95102
return redirects, targets
96103

97104

105+
NO_PATH = ''
106+
107+
def shorten_one(target: str, path_gen: Iterator[str], redirects: dict, targets: dict) -> ShortenResult:
108+
if path := targets.get(target, NO_PATH):
109+
return ShortenResult(target, path, False)
110+
path = next(path_gen)
111+
redirects[path] = target
112+
targets[target] = path
113+
return ShortenResult(target, path, True)
114+
115+
116+
def update_htaccess(new_targets: list[ShortenResult]):
117+
118+
pass
119+
120+
98121
SDIGITS = '23456789abcdefghjkmnpqrstvwxyz'
99122

100123

links/test_shortener.py

Lines changed: 83 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,33 @@
11
from pytest import mark
22

33
from shortener import parse_htaccess, choose, load_redirects
4-
from shortener import gen_short, gen_unused_short
4+
from shortener import gen_short, gen_unused_short, shorten_one, ShortenResult
5+
6+
7+
SAMPLE_HTACCESS = """
8+
ErrorDocument 404 /404.html
9+
10+
# main resources
11+
RedirectTemp /book https://www.oreilly.com/.../9781492056348/
12+
RedirectTemp /home https://www.fluentpython.com/ # extra content site
13+
14+
# duplicate targets
15+
RedirectTemp /1-20 https://www.fluentpython.com/
16+
RedirectTemp /ora https://www.oreilly.com/.../9781492056348/
17+
RedirectTemp /2-10 http://example.com/
18+
RedirectTemp /10-2 http://example.com/
19+
20+
# shortened
21+
RedirectTemp /22 http://firstshortened.co
22+
"""
23+
24+
FROZEN_TIME = '2025-06-07 01:02:03'
25+
26+
UPDATED_SAMPLE_HTACCESS = SAMPLE_HTACCESS + f"""
27+
# appended: {FROZEN_TIME}
28+
RedirectTemp /23 https://new.site/
29+
RedirectTemp /24 https://other.new.site/
30+
"""
531

632

733
PARSED_SAMPLE_HTACCESS = [
@@ -11,13 +37,32 @@
1137
('ora', 'https://www.oreilly.com/.../9781492056348/'),
1238
('2-10', 'http://example.com/'),
1339
('10-2', 'http://example.com/'),
40+
('22', 'http://firstshortened.co')
1441
]
1542

43+
# straightforward mapping of .htaccess; some targets may be duplicated.
44+
SAMPLE_REDIRECTS = {
45+
'home': 'https://www.fluentpython.com/',
46+
'1-20': 'https://www.fluentpython.com/',
47+
'2-10': 'http://example.com/',
48+
'10-2': 'http://example.com/',
49+
'book': 'https://www.oreilly.com/.../9781492056348/',
50+
'ora': 'https://www.oreilly.com/.../9781492056348/',
51+
'22': 'http://firstshortened.co',
52+
}
53+
54+
# the value must be shortest path for that target in the .htaccess
55+
SAMPLE_TARGETS = {
56+
'https://www.fluentpython.com/': 'home',
57+
'https://www.oreilly.com/.../9781492056348/': 'ora',
58+
'http://example.com/': '2-10',
59+
'http://firstshortened.co': '22',
60+
}
61+
1662

17-
def test_parse_htaccess(shared_datadir):
18-
with open(shared_datadir / 'sample.htaccess') as fp:
19-
text = fp.read()
20-
res = list(parse_htaccess(text))
63+
64+
def test_parse_htaccess():
65+
res = list(parse_htaccess(SAMPLE_HTACCESS))
2166
assert res == PARSED_SAMPLE_HTACCESS
2267

2368

@@ -38,26 +83,43 @@ def test_choose(a, b, expected):
3883

3984

4085
def test_load_redirects():
41-
expected = {
42-
'home': 'https://www.fluentpython.com/',
43-
'1-20': 'https://www.fluentpython.com/',
44-
'2-10': 'http://example.com/',
45-
'10-2': 'http://example.com/',
46-
'book': 'https://www.oreilly.com/.../9781492056348/',
47-
'ora': 'https://www.oreilly.com/.../9781492056348/',
48-
}
49-
redirects, _ = load_redirects(PARSED_SAMPLE_HTACCESS)
50-
assert redirects == expected
86+
redirects, _ = load_redirects(PARSED_SAMPLE_HTACCESS)
87+
assert redirects == SAMPLE_REDIRECTS
5188

5289

5390
def test_load_redirect_targets():
54-
expected = {
55-
'https://www.fluentpython.com/': 'home',
56-
'https://www.oreilly.com/.../9781492056348/': 'ora',
57-
'http://example.com/': '2-10',
58-
}
5991
_, targets = load_redirects(PARSED_SAMPLE_HTACCESS)
60-
assert targets == expected
92+
assert targets == SAMPLE_TARGETS
93+
94+
95+
@mark.parametrize(
96+
'target,path,new',
97+
[
98+
('https://www.fluentpython.com/', 'home', False),
99+
('https://new.site/', '23', True),
100+
],
101+
)
102+
def test_shorten(target, path, new):
103+
expected = ShortenResult(target, path, new)
104+
redirects = dict(SAMPLE_REDIRECTS)
105+
targets = dict(SAMPLE_TARGETS)
106+
result = shorten_one(target, gen_unused_short(redirects), redirects, targets)
107+
assert result == expected
108+
updated = redirects.keys() - SAMPLE_REDIRECTS.keys()
109+
if new:
110+
assert len(updated) == 1
111+
new_path = updated.pop()
112+
assert new_path == path
113+
assert redirects == SAMPLE_REDIRECTS | {new_path: target}
114+
assert targets == SAMPLE_TARGETS | {target: new_path}
115+
else:
116+
assert len(updated) == 0
117+
assert redirects == SAMPLE_REDIRECTS
118+
assert targets == SAMPLE_TARGETS
119+
120+
121+
def test_update_htaccess():
122+
pass
61123

62124

63125
def test_gen_short():

0 commit comments

Comments
 (0)