Skip to content

Commit 49f52e2

Browse files
committed
updated chapter 4 and appendix-A files
1 parent a1d6c12 commit 49f52e2

25 files changed

+5219
-10
lines changed

04-text-byte/charfinder/cf.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
import unicodedata
4+
5+
FIRST, LAST = ord(' '), sys.maxunicode # <1>
6+
7+
8+
def find(*query_words, first=FIRST, last=LAST): # <2>
9+
query = {w.upper() for w in query_words} # <3>
10+
count = 0
11+
for code in range(first, last + 1):
12+
char = chr(code) # <4>
13+
name = unicodedata.name(char, None) # <5>
14+
if name and query.issubset(name.split()): # <6>
15+
print(f'U+{code:04X}\t{char}\t{name}') # <7>
16+
count += 1
17+
print(f'({count} found)')
18+
19+
20+
def main(words):
21+
if words:
22+
find(*words)
23+
else:
24+
print('Please provide words to find.')
25+
26+
27+
if __name__ == '__main__':
28+
main(sys.argv[1:])
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
Doctests for ``cf.py``
2+
======================
3+
4+
How to run the tests
5+
----------------------
6+
7+
Run the ``doctest`` module from the command line::
8+
9+
$ python3 -m doctest cf_tests.rst
10+
11+
12+
Tests
13+
-----
14+
15+
Import functions for testing::
16+
17+
>>> from cf import find, main
18+
19+
Test ``find`` with single result::
20+
21+
>>> find("sign", "registered") # doctest:+NORMALIZE_WHITESPACE
22+
U+00AE ® REGISTERED SIGN
23+
(1 found)
24+
25+
26+
Test ``find`` with two results::
27+
28+
>>> find("chess", "queen", last=0xFFFF) # doctest:+NORMALIZE_WHITESPACE
29+
U+2655 ♕ WHITE CHESS QUEEN
30+
U+265B ♛ BLACK CHESS QUEEN
31+
(2 found)
32+
33+
Test ``main`` with no words::
34+
35+
>>> main([])
36+
Please provide words to find.

04-text-byte/charfinder/test.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
python3 -m doctest cf_tests.rst $1

04-text-byte/numerics_demo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# BEGIN NUMERICS_DEMO
1+
# tag::NUMERICS_DEMO[]
22
import unicodedata
33
import re
44

@@ -15,4 +15,4 @@
1515
format(unicodedata.numeric(char), '5.2f'), # <6>
1616
unicodedata.name(char), # <7>
1717
sep='\t')
18-
# END NUMERICS_DEMO
18+
# end::NUMERICS_DEMO[]

04-text-byte/ramanujan.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# BEGIN RE_DEMO
1+
# tag::RE_DEMO[]
22
import re
33

44
re_numbers_str = re.compile(r'\d+') # <1>
@@ -18,4 +18,4 @@
1818
print('Words')
1919
print(' str :', re_words_str.findall(text_str)) # <8>
2020
print(' bytes:', re_words_bytes.findall(text_bytes)) # <9>
21-
# END RE_DEMO
21+
# end::RE_DEMO[]

04-text-byte/sanitize.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
2929
"""
3030

31-
# BEGIN SHAVE_MARKS
31+
# tag::SHAVE_MARKS[]
3232
import unicodedata
3333
import string
3434

@@ -39,9 +39,9 @@ def shave_marks(txt):
3939
shaved = ''.join(c for c in norm_txt
4040
if not unicodedata.combining(c)) # <2>
4141
return unicodedata.normalize('NFC', shaved) # <3>
42-
# END SHAVE_MARKS
42+
# end::SHAVE_MARKS[]
4343

44-
# BEGIN SHAVE_MARKS_LATIN
44+
# tag::SHAVE_MARKS_LATIN[]
4545
def shave_marks_latin(txt):
4646
"""Remove all diacritic marks from Latin base characters"""
4747
norm_txt = unicodedata.normalize('NFD', txt) # <1>
@@ -56,9 +56,9 @@ def shave_marks_latin(txt):
5656
latin_base = c in string.ascii_letters
5757
shaved = ''.join(keepers)
5858
return unicodedata.normalize('NFC', shaved) # <5>
59-
# END SHAVE_MARKS_LATIN
59+
# end::SHAVE_MARKS_LATIN[]
6060

61-
# BEGIN ASCIIZE
61+
# tag::ASCIIZE[]
6262
single_map = str.maketrans("""‚ƒ„†ˆ‹‘’“”•–—˜›""", # <1>
6363
"""'f"*^<''""---~>""")
6464

@@ -84,4 +84,4 @@ def asciize(txt):
8484
no_marks = shave_marks_latin(dewinize(txt)) # <5>
8585
no_marks = no_marks.replace('ß', 'ss') # <6>
8686
return unicodedata.normalize('NFKC', no_marks) # <7>
87-
# END ASCIIZE
87+
# end::ASCIIZE[]

04-text-byte/skin.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from unicodedata import name
2+
3+
SKIN1 = 0x1F3FB # EMOJI MODIFIER FITZPATRICK TYPE-1-2 # <1>
4+
SKINS = [chr(i) for i in range(SKIN1, SKIN1 + 5)] # <2>
5+
THUMB = '\U0001F44d' # THUMBS UP SIGN 👍
6+
7+
examples = [THUMB] # <3>
8+
examples.extend(THUMB + skin for skin in SKINS) # <4>
9+
10+
for example in examples:
11+
print(example, end='\t') # <5>
12+
print(' + '.join(name(char) for char in example)) # <6>

04-text-byte/two_flags.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# REGIONAL INDICATOR SYMBOLS
2+
RIS_A = '\U0001F1E6' # LETTER A
3+
RIS_U = '\U0001F1FA' # LETTER U
4+
print(RIS_A + RIS_U) # AU: Australia
5+
print(RIS_U + RIS_A) # UA: Ukraine
6+
print(RIS_A + RIS_A) # AA: no such country

04-text-byte/zwj_sample.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from unicodedata import name
2+
3+
zwg_sample = """
4+
1F468 200D 1F9B0 |man: red hair |E11.0
5+
1F9D1 200D 1F91D 200D 1F9D1 |people holding hands |E12.0
6+
1F3CA 1F3FF 200D 2640 FE0F |woman swimming: dark skin tone |E4.0
7+
1F469 1F3FE 200D 2708 FE0F |woman pilot: medium-dark skin tone |E4.0
8+
1F468 200D 1F469 200D 1F467 |family: man, woman, girl |E2.0
9+
1F3F3 FE0F 200D 26A7 FE0F |transgender flag |E13.0
10+
1F469 200D 2764 FE0F 200D 1F48B 200D 1F469 |kiss: woman, woman |E2.0
11+
"""
12+
13+
markers = {'\u200D': 'ZWG', # ZERO WIDTH JOINER
14+
'\uFE0F': 'V16', # VARIATION SELECTOR-16
15+
}
16+
17+
for line in zwg_sample.strip().split('\n'):
18+
code, descr, version = (s.strip() for s in line.split('|'))
19+
chars = [chr(int(c, 16)) for c in code.split()]
20+
print(''.join(chars), version, descr, sep='\t', end='')
21+
while chars:
22+
char = chars.pop(0)
23+
if char in markers:
24+
print(' + ' + markers[char], end='')
25+
else:
26+
ucode = f'U+{ord(char):04X}'
27+
print(f'\n\t{char}\t{ucode}\t{name(char)}', end='')
28+
print()

appendix-A/arcfour.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""RC4 compatible algorithm"""
2+
3+
def arcfour(key, in_bytes, loops=20):
4+
5+
kbox = bytearray(256) # create key box
6+
for i, car in enumerate(key): # copy key and vector
7+
kbox[i] = car
8+
j = len(key)
9+
for i in range(j, 256): # repeat until full
10+
kbox[i] = kbox[i-j]
11+
12+
# [1] initialize sbox
13+
sbox = bytearray(range(256))
14+
15+
# repeat sbox mixing loop, as recommened in CipherSaber-2
16+
# http://ciphersaber.gurus.com/faq.html#cs2
17+
j = 0
18+
for k in range(loops):
19+
for i in range(256):
20+
j = (j + sbox[i] + kbox[i]) % 256
21+
sbox[i], sbox[j] = sbox[j], sbox[i]
22+
23+
# main loop
24+
i = 0
25+
j = 0
26+
out_bytes = bytearray()
27+
28+
for car in in_bytes:
29+
i = (i + 1) % 256
30+
# [2] shuffle sbox
31+
j = (j + sbox[i]) % 256
32+
sbox[i], sbox[j] = sbox[j], sbox[i]
33+
# [3] compute t
34+
t = (sbox[i] + sbox[j]) % 256
35+
k = sbox[t]
36+
car = car ^ k
37+
out_bytes.append(car)
38+
39+
return out_bytes
40+
41+
42+
def test():
43+
from time import time
44+
clear = bytearray(b'1234567890' * 100000)
45+
t0 = time()
46+
cipher = arcfour(b'key', clear)
47+
print('elapsed time: %.2fs' % (time() - t0))
48+
result = arcfour(b'key', cipher)
49+
assert result == clear, '%r != %r' % (result, clear)
50+
print('elapsed time: %.2fs' % (time() - t0))
51+
print('OK')
52+
53+
54+
if __name__ == '__main__':
55+
test()

0 commit comments

Comments
 (0)