-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathunicodecomplete_create_cache.py
More file actions
211 lines (200 loc) · 26.4 KB
/
unicodecomplete_create_cache.py
File metadata and controls
211 lines (200 loc) · 26.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""
This is hereby released completely and irrevocably into the Public Domain.
- Joshua Landau <joshua@landau.ws>
"""
# Lots of data that shouldn't be in a program...
# Skip ahead 150 lines to the actual program.
extras = list(zip(
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\t\n\n\x0b\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1c\x1d\x1d\x1e\x1e\x1f\x1f!!#$''()*,-./@[\\]_{{|}}\x7f\x82\x83\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x9a\x9b\x9c\x9d\x9e\x9f£¥¦¦««¬\xad®¯²³¶···»¿Æ×ØßææøİʼnŋœƍƎƐƒƒƒƗƚƛƟƢƣƲƶƿǀǁǂǂǃǃǷɑɖɛɞɤɨɵɿʋʌʒʕʘʚʼˇ̷̸̨̣̲̳̀́́̂̄̅̆̆̇̈̈̈̉̌̒̓̔̾͜͡ͅͅʹ͵ͺ;ΓΙλςϐϑϖϰϱϲϵ϶ϽϾϿєіѼԕԗԡԣ՚՛՜՝՞՟։֊ּֽ֑֖֥֪֮֒֘֜֟֡֨׀אצװײـٗۀۀۡ\u070fࠜࠞࠡࠥࠦࠩ࠰࠱࠲࠳࠴࠵࠸࠹࠺࠻࠼࠽࠾ऀँंॅ्॒॑।॥ব্ৱਅ਼ਾਿੀੁੂੇੈੋੌଯଡ଼ଢ଼ୟஃ௳௴௵௶௷௸௹௺ఁం్ೞചഛഞടഠഡഢണതഥദധഴശഷഽ്്ංඃඅආඇඈඉඊඋඌඍඎඏඐඑඒඓඔඕඖකඛගඝඞඟචඡජඣඤඥඦටඨඩඪණඬතථදධනඳපඵබභමඹයරලවශෂසහළෆ්ාැෑිීුූෘෙේෛොෝෞෟෲෳฮฯืใไๅๆ็ํກຂຄງຈຊຍດຕຖທນບປຜຝຝພຟມຢຣຣລລວສຫອຮົຽໃໄ་།ཥཱཾཿ྄ྭྰྱྲྵ࿎࿐࿒࿓࿔࿕࿖࿗࿘့္်ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵᆨᆩᆪᆫᆬᆭᆮᆯᆰᆱᆲᆳᆴᆵᆶᆷᆸᆹᆺᆻᆼᆽᆾᆿᇀᇁᇂᇬᇭᇮᇯំំះះᬀᬁᬂᬃᬄᬅᬆᬇᬈᬉᬊᬋᬌᬍᬎᬏᬐᬑᬒᬔᬖᬙᬛᬝᬞᬟᬠᬡᬣᬥᬨᬪᬰᬱ᬴ᬵᬶᬷᬸᬹᬺᬻᬼᬽᬾᬿᭀᭁᭂᭃ᭄᭚᭛᭜᭝᭞᭟᭠ᮀᮁᮂᮡᮢᮣᮤᮥᮦᮧᮨᮩ᮪᳐᳑᳒᳓᳕᳖᳗᳘᳙᳜᳝᳞᳟᳚᳛᳠᳡᳢᳣᳤᳥᳦᳧᳨ᳩᳪᳫᳬ᳭ᳮᳯᳰᳱᳲ\u2001\u2002\u2003\u2004\u2005―‘’‚‛“”„‟†‡•…‰‱′″‴‹›※※‾‿⁀⁄⁒⁕⁙⁙⁝\u2063⃛⃜⃝⃝⃦⃫⃧⃩ℂ℃℄ℊℋℌℎℑℒℓℓ℔ℕ℗℗℘ℚℛℜℝ℞℞ℤ℧ℬℯℯℰℱℲℳℳℴℵℶℷℸ⅁ↃↃ→↔↞↟↠↠↡↡↣↦↧↯↴↸↹⇞⇟⇤⇥⇧⇪⇫⇬⇭⇮⇯⇰⇱⇲⇳⇸⇹⇻⇼∀∃∅∆∆∆∇∇∋∎∏∐∑∘∘√∢∣∣∧∨∩∪∸∼∼∼∼∼∼∽∾∿≈≒≗≙≜≜≬≺≻⊂⊃⊎⊕⊕⊖⊗⊗⊙⊙⊢⊢⊢⊣⊣⊤⊥⊦⊨⊨⊨⊨⋂⋃⋔⌈⌊⌐⌑⌘⌙⌤⌦⌧⌫⍊⍎⍑⍕⍡⍤⍥⍨⎅⎈⎉⎊⎋⎰⎱⑆⑈⑉─│┌┐└┘├┝┤┥┬┯┴┷┼┿█■□▮△▷▻▽◁◅◉◻☀☁☂☃☓☔☕☞☠☧☫☬☰☱☲☳☴☵☶☷☺☼♀♂♍♏♏♥♯⚒⚓⚔⚕⚖⚗⚘⚙⚛⚞⚟⚡⚢⚣⚤⚥⚥⚦⚧⚨⚩⚪⚪⚬⚰⚱⚿⛄⛅⛆⛇⛈⛌⛏⛐⛑⛒⛓⛛⛝⛞⛟⛟⛣⛨⛩⛭⛮⛱⛲⛳⛴⛵⛶⛷⛸⛹⛺⛼⛽⛾⛿✶✺❉❋❗❦❧⟂⟓⟔⟟⟡⟢⟣⟤⟥⟦⟧⟨⟨⟩⟩⟪⟫⟮⟯⟻⟼⤀⤁⤅⤆⤇⤔⤕⤖⤖⤗⤘⧖⧖⧜⧠⧦⧴⧹⨇⨛⨜⨝⨤⨦⨧⩁⩨⪡⫝̸⫝⫢⫫⫱⫴⫾⫿⭕⭖⭗⭘⭙ⵀⵓⵘⵤⵯ⸖⸘⸮〚〛〮〯しじちぢっつづふシジチヂッツヅフㅤㆀ㆐㆑㉈㉉㉊㉋㉌㉍㉎㉏㊀㍿ꀕ꠆꣠꣡꣢꣣꣤꣥꣦꣧꣨꣩꣪꣫꣬꣭꣮꣯꣰꣱ꣲꣳꣴꣵꣶꣷ꣸꣺ꣻꥇꥈꥉꥊꥌꥎꥏꥐꥑꦀꦁꦂꦃꦉꦊꦋꦐꦑꦓꦖꦘꦙꦜꦞꦟꦡꦣꦦꦨꦯꦰ꦳ꦴꦵꦶꦷꦸꦹꦺꦻꦼꦽꦾꦿ꧀꧈꧉ꫛꫜꯀꯁꯂꯃꯆꯇꯈꯉꯊꯋꯌꯍꯎꯐꯑꯒꯓꯔꯖꯗꯘꯙꯚꯛꯜꯝꯞꯟꯠꯡꯢꯣꯤꯥꯦꯧꯨꯩꯪ꯫꯬꯭꯰꯱꯲꯳꯴꯵꯶꯷꯸꯹︘\ufeff𐀎𐀘𐀛𐀥𐀭𐁂𐁉𐁒𐂓𐂔𐂘𐂙𐂤𐃉𐃋𐨿𐩠𐩡𐩢𐩣𐩤𐩥𐩦𐩧𐩨𐩩𐩫𐩬𐩭𐩮𐩰𐩱𐩲𐩳𐩴𐩵𐩶𐩷𐩸𐩹𐩺𐩻𐩼𝃅𝄺𝄻𝄼𝄽𝈂𝈅𝈆𝈈𝈍𝈎𝈑𝈓𝈜𝌀𝌁𝌂𝌃𝌄𝌅𝒫𝔐𝔖𝚤𝚥🀄🀅🀆🀇🀐🀙🀢🀣🀤🀥🀪🄫🄬🄱🄱🄽🄿🅂🅆🅊🅋🅌🅍🅎🅗🅟🅹🅻🅼🅿🆊🆋🆌🆍🆐🈀🈐🈑🈒🈓🈔🈔🈕🈖🈗🈘🈙🈚🈛🈜🈝🈞🈟🈠🈡🈢🈣🈤🈥🈦🈧🈨🈩🈪🈫🈬🈭🈮🈯🈰🈱🉀🉁🉂🉃🉄🉅🉆🉇🉈",
"""NULL|START OF HEADING|START OF TEXT|END OF TEXT|END OF TRANSMISSION|ENQUIRY|ACKNOWLEDGE|BELL|BACKSPACE|CHARACTER TABULATION|HORIZONTAL TABULATION (HT), TAB|
LINE FEED (LF)|NEW LINE (NL), END OF LINE (EOL)|LINE TABULATION|VERTICAL TABULATION (VT)|FORM FEED (FF)|CARRIAGE RETURN (CR)|SHIFT OUT|
SHIFT IN|DATA LINK ESCAPE|DEVICE CONTROL ONE|DEVICE CONTROL TWO|DEVICE CONTROL THREE|DEVICE CONTROL FOUR|NEGATIVE ACKNOWLEDGE|SYNCHRONOUS IDLE|
END OF TRANSMISSION BLOCK|CANCEL|END OF MEDIUM|SUBSTITUTE|ESCAPE|INFORMATION SEPARATOR FOUR|FILE SEPARATOR (FS)|INFORMATION SEPARATOR THREE|
GROUP SEPARATOR (GS)|INFORMATION SEPARATOR TWO|RECORD SEPARATOR (RS)|INFORMATION SEPARATOR ONE|UNIT SEPARATOR (US)|FACTORIAL|BANG|
POUND SIGN, HASH, CROSSHATCH, OCTOTHORPE|MILREIS, ESCUDO|APOSTROPHE-QUOTE|APL QUOTE|OPENING PARENTHESIS|CLOSING PARENTHESIS|STAR|
DECIMAL SEPARATOR|HYPHEN OR MINUS SIGN|PERIOD, DOT, DECIMAL POINT|SLASH, VIRGULE|AT SIGN|OPENING SQUARE BRACKET|BACKSLASH|CLOSING SQUARE BRACKET|
SPACING UNDERSCORE|OPENING CURLY BRACKET|LEFT BRACE|VERTICAL BAR|CLOSING CURLY BRACKET|RIGHT BRACE|DELETE|BREAK PERMITTED HERE|NO BREAK HERE|
NEXT LINE (NEL)|START OF SELECTED AREA|END OF SELECTED AREA|CHARACTER TABULATION SET|CHARACTER TABULATION WITH JUSTIFICATION|
LINE TABULATION SET|PARTIAL LINE FORWARD|PARTIAL LINE BACKWARD|REVERSE LINE FEED|SINGLE SHIFT TWO|SINGLE SHIFT THREE|DEVICE CONTROL STRING|
PRIVATE USE ONE|PRIVATE USE TWO|SET TRANSMIT STATE|CANCEL CHARACTER|MESSAGE WAITING|START OF GUARDED AREA|END OF GUARDED AREA|START OF STRING|
SINGLE CHARACTER INTRODUCER|CONTROL SEQUENCE INTRODUCER|STRING TERMINATOR|OPERATING SYSTEM COMMAND|PRIVACY MESSAGE|APPLICATION PROGRAM COMMAND|
POUND STERLING, IRISH PUNT, ITALIAN LIRA, TURKISH LIRA, ETC.|YUAN SIGN|BROKEN VERTICAL BAR|PARTED RULE (TYPOGRAPHY)|LEFT GUILLEMET|
CHEVRONS (TYPOGRAPHY)|ANGLED DASH (TYPOGRAPHY)|DISCRETIONARY HYPHEN|REGISTERED TRADE MARK SIGN|OVERLINE, APL OVERBAR|SQUARED|CUBED|
PARAGRAPH SIGN|MIDPOINT (TYPOGRAPHY)|GEORGIAN COMMA|GREEK MIDDLE DOT (ANO TELEIA)|RIGHT GUILLEMET|TURNED QUESTION MARK|LATIN CAPITAL LIGATURE AE|
Z NOTATION CARTESIAN PRODUCT|O SLASH|ESZETT|LATIN SMALL LIGATURE AE|ASH|O SLASH|I DOT|LATIN SMALL LETTER APOSTROPHE N|ENGMA, ANGMA|
ETHEL|REVERSED POLISH-HOOK O|TURNED E|EPSILON|SCRIPT F|FLORIN CURRENCY SYMBOL (NETHERLANDS)|FUNCTION SYMBOL|BARRED I, I BAR|BARRED L|
BARRED LAMBDA, LAMBDA BAR|BARRED O, O BAR|LATIN CAPITAL LETTER GHA|LATIN SMALL LETTER GHA|SCRIPT V|BARRED Z, Z BAR|WEN|PIPE|DOUBLE PIPE|
DOUBLE-BARRED PIPE|PALATOALVEOLAR CLICK (IPA)|LATIN LETTER EXCLAMATION MARK|(POST)ALVEOLAR CLICK (IPA)|WEN|LATIN SMALL LETTER SCRIPT A|
D RETROFLEX HOOK|EPSILON|CLOSED REVERSED EPSILON|LATIN SMALL LETTER BABY GAMMA|BARRED I, I BAR|O BAR|LONG LEG TURNED IOTA|LATIN SMALL LETTER SCRIPT V|
CARET, WEDGE|DRAM|REVERSED GLOTTAL STOP|BULLSEYE|CLOSED EPSILON|APOSTROPHE|HACEK|GREEK VARIA|STRESS MARK|GREEK OXIA, TONOS|HAT|LONG|
OVERSCORE, VINCULUM|SHORT|GREEK VRACHY|DERIVATIVE (NEWTONIAN NOTATION)|DOUBLE DOT ABOVE, UMLAUT|GREEK DIALYTIKA|DOUBLE DERIVATIVE|
HOI|HACEK, V ABOVE|CEDILLA ABOVE|GREEK PSILI, SMOOTH BREATHING MARK|GREEK DASIA, ROUGH BREATHING MARK|NANG|NASAL HOOK|UNDERLINE, UNDERSCORE|
DOUBLE UNDERLINE, DOUBLE UNDERSCORE|SHORT SLASH OVERLAY|LONG SLASH OVERLAY|YERIK|GREEK NON-SPACING IOTA BELOW|IOTA SUBSCRIPT|LIGATURE TIE BELOW, PAPYROLOGICAL HYPHEN|
LIGATURE TIE|DEXIA KERAIA|ARISTERI KERAIA|IOTA SUBSCRIPT|EROTIMATIKO|GAMMA FUNCTION|IOTA ADSCRIPT|LAMBDA|STIGMA|CURLED BETA|SCRIPT THETA|
OMEGA PI|SCRIPT KAPPA|TAILED RHO|GREEK SMALL LETTER LUNATE SIGMA|STRAIGHT EPSILON|REVERSED STRAIGHT EPSILON|ANTISIGMA|SIGMA PERIESTIGMENON|
ANTISIGMA PERIESTIGMENON|OLD CYRILLIC YEST|OLD CYRILLIC I|CYRILLIC "BEAUTIFUL OMEGA"|VOICELESS L|VOICELESS R|PALATALIZED L|PALATALIZED N|
ARMENIAN MODIFIER LETTER RIGHT HALF RING|SHESHT|BATSAGANCHAKAN NSHAN|BOWT|HARTSAKAN NSHAN|PATIW|VERTSAKET|YENTAMNA|ATNAH|SEGOLTA|TARHA, ME'AYLA ~ MAYLA|
TSINORIT, ZINORIT; TSINOR, ZINOR|TERES|PAZER GADOL|PAZER QATAN|YORED|AZLA|GALGAL|TSINOR; ZARQA|SHURUQ|SILUQ|LEGARMEH|ALEPH|ZADE|TSVEY VOVN|
TSVEY YUDN|KASHIDA|ULTA PESH|ARABIC LETTER HAMZAH ON HA|IZAFET|ARABIC JAZM|SAM|FATHA AL-NIDA|FATHA AL-IMA|FATHA AL-IHA|FATHA|DAMMA|KASRA|
WORD SEPARATOR|INTERRUPTION|RESTRAINT|PRAYER|SURPRISE|QUESTION|SHOUTING|END OF SECTION|OUTBURST|TEACHING|SUBMISSIVENESS|FULL STOP|REST|
VAIDIKA ADHOMUKHA CANDRABINDU|ANUNASIKA|BINDU|CANDRA|HALANT|VEDIC TONE SVARITA|VEDIC TONE ANUDATTA|PURNA VIRAM|DEERGH VIRAM|BENGALI VA, WA|
HASANT|BENGALI LETTER VA WITH LOWER DIAGONAL|AIRA|PAIRIN BINDI|KANNA|SIHARI|BIHARI|AUNKAR|DULAINKAR|LANVAN|DULANVAN|HORA|KANAURA|JA|DDA|
DDHA|YA|AYTHAM|NAAL|MAATHAM|VARUDAM|PATRU|VARAVU|MERPADI|RUPAI|ENN|ARASUNNA|SUNNA|HALANT|KANNADA LETTER LLLA|CHA|CHHA|NHA|TA|TTA|HARD DA|HARD DDA|
HARD NA|THA|TTHA|SOFT DA|SOFT DDA|ZHA|SOFT SHA|SHA|PRASLESHAM|CHANDRAKKALA|VOWEL HALF-U|ANUSVARA|VISARGA|SINHALA LETTER A|SINHALA LETTER AA|
SINHALA LETTER AE|SINHALA LETTER AAE|SINHALA LETTER I|SINHALA LETTER II|SINHALA LETTER U|SINHALA LETTER UU|SINHALA LETTER VOCALIC R|
SINHALA LETTER VOCALIC RR|SINHALA LETTER VOCALIC L|SINHALA LETTER VOCALIC LL|SINHALA LETTER E|SINHALA LETTER EE|SINHALA LETTER AI|
SINHALA LETTER O|SINHALA LETTER OO|SINHALA LETTER AU|SINHALA LETTER KA|SINHALA LETTER KHA|SINHALA LETTER GA|SINHALA LETTER GHA|
SINHALA LETTER NGA|SINHALA LETTER NNGA|SINHALA LETTER CA|SINHALA LETTER CHA|SINHALA LETTER JA|SINHALA LETTER JHA|SINHALA LETTER NYA|
SINHALA LETTER JNYA|SINHALA LETTER NYJA|SINHALA LETTER TTA|SINHALA LETTER TTHA|SINHALA LETTER DDA|SINHALA LETTER DDHA|SINHALA LETTER NNA|
SINHALA LETTER NNDDA|SINHALA LETTER TA|SINHALA LETTER THA|SINHALA LETTER DA|SINHALA LETTER DHA|SINHALA LETTER NA|SINHALA LETTER NDA|
SINHALA LETTER PA|SINHALA LETTER PHA|SINHALA LETTER BA|SINHALA LETTER BHA|SINHALA LETTER MA|SINHALA LETTER MBA|SINHALA LETTER YA|
SINHALA LETTER RA|SINHALA LETTER LA|SINHALA LETTER VA|SINHALA LETTER SHA|SINHALA LETTER SSA|SINHALA LETTER SA|SINHALA LETTER HA|
SINHALA LETTER LLA|SINHALA LETTER FA|VIRAMA|SINHALA VOWEL SIGN AA|SINHALA VOWEL SIGN AE|SINHALA VOWEL SIGN AAE|SINHALA VOWEL SIGN I|
SINHALA VOWEL SIGN II|SINHALA VOWEL SIGN U|SINHALA VOWEL SIGN UU|SINHALA VOWEL SIGN VOCALIC R|SINHALA VOWEL SIGN E|SINHALA VOWEL SIGN EE|
SINHALA VOWEL SIGN AI|SINHALA VOWEL SIGN O|SINHALA VOWEL SIGN OO|SINHALA VOWEL SIGN AU|SINHALA VOWEL SIGN VOCALIC L|SINHALA VOWEL SIGN VOCALIC RR|
SINHALA VOWEL SIGN VOCALIC LL|HO NOK HUK|PAIYAN NOI|SARA UUE|SARA AI MAI MUAN|SARA AI MAI MALAI|LAKKHANG YAO|MAI YAMOK|MAI TAIKHU|
NIKKHAHIT|KO KAY|KHO KHAY|KHO KHUAY|NGO NGU, NGO NGUA|CO COK, CO CUA|SO SANG|NYO NYUNG|DO DEK|TO TA|THO THONG|THO THUNG|NO NOK|BO BE, BO BET|
PO PA|PHO PHENG|LAO LETTER FO FON|FO FA|PHO PHU|LAO LETTER FO FAY|MO MEW, MO MA|YO YA|LAO LETTER RO|RO ROT|LAO LETTER LO|LO LING|WO WI|
SO SYA|HO HAY, HO HAN|O O|HO HYA, HO HYAN|MAI KONG|NYO FYANG|MAI MUAN|MAI MAY|TSEK|SHEY|REVERSED SHA|A-CHUNG|ANUSVARA|VISARGA|SROG MED|
WA-ZUR, WA-BTAGS (WA TA)|A-CHUNG|YA-BTAGS (YA TA)|RA-BTAGS (RA TA)|REVERSED SUBJOINED SHA|DENA DEKA|TIBETAN MARK BKA- SHOG GI MGO RGYAN|
NYI TSEK|DA NYING YIK GO DUN MA|DA NYING YIK GO KAB MA|GYUNG DRUNG NANG -KHOR|GYUNG DRUNG PHYI -KHOR|GYUNG DRUNG NANG -KHOR BZHI MIG CAN|
GYUNG DRUNG PHYI -KHOR BZHI MIG CAN|AUKMYIT|KILLER|KILLER|G|GG|N|D|DD|R|M|B|BB|S|SS|J|JJ|C|K|T|P|H|A|AE|YA|YAE|EO|E|YEO|YE|O|WA|WAE|OE|YO|U|WEO|WE|WI|YU|EU|YI|I|G|GG|
GS|N|NJ|NH|D|L|LG|LM|LB|LS|LT|LP|LH|M|B|BS|S|SS|NG|J|C|K|T|P|H|YESIEUNG-KIYEOK|YESIEUNG-SSANGKIYEOK|SSANGYESIEUNG|YESIEUNG-KHIEUKH|SRAK AM|ANUSVARA|SRAK AH|
VISARGA|ARDHACANDRA|CANDRABINDU|ANUSVARA|REPHA|VISARGA|A|AA|I|II|U|UU|VOCALIC R|VOCALIC RR|VOCALIC L|VOCALIC LL|E|AI|O|AU|KHA|GHA|CHA|JHA|TTA|TTHA|
DDA|DDHA|NNA|THA|DHA|PHA|BHA|SHA|SSA|NUKTA|AA|I|II|U|UU|VOCALIC R|VOCALIC RR|VOCALIC L|VOCALIC LL|E|AI|O|AU|AE|OE|VIRAMA|SECTION|HONORIFIC SECTION|PUNCTUATION RING|
COLON|DANDA|DOUBLE DANDA|LINE-BREAKING HYPHEN|ANUSVARA|REPHA|VISARGA|SUBJOINED YA|SUBJOINED RA|SUBJOINED LA|I|U|AE|O|E|EU|VIRAMA|VAIDIKA SAAMASVARA KARSHANNA|
VAIDIKA SVARITA UURDHVA SHARA|VAIDIKA SAAMASVARA PRENKHA|VAIDIKA SAAMAGAANA YOGAKAALA|VAIDIKA SVARITA ADHO NYUBJA|VAIDIKA SVARITA ADHAH KONNA|
VAIDIKA SVARITA ADHO VAKRA REKHAA|VAIDIKA SVARITA ADHO'RDHA VAKRA|VAIDIKA SVARITA ADHAH SAMYUKTA REKHAA|VAIDIKA SVARITA UURDHVA DVI REKHAA|
VAIDIKA SVARITA UURDHVA TRI REKHAA|VAIDIKA SVARITA ADHO REKHAA|VAIDIKA SVARITA ADHO BINDU|VAIDIKA SVARITA ADHO DVI BINDU|VAIDIKA SVARITA ADHAS TRI BINDU|
VAIDIKA UURDHVA VAKRA REKHAA|VAIDIKA SVARITA DVI VAKRA KHANNDA|VAIDIKA MADHYAREKHAA|VAIDIKA VISARGA DAKSHINNATAH UURDHVAGA|VAIDIKA VISARGA VAAMATAH UURDHVAGA|
VAIDIKA VISARGA VAAMATAH ADHOGA|VAIDIKA VISARGA DAKSHINNATAH ADHOGA|VAIDIKA VISARGA DAKSHINNATAH UURDHVA VAKRA|VAIDIKA VISARGA VAAMATAH ADHO VAKRA|
VAIDIKA ANUSVAARA ANTARMUKHA|VAIDIKA ANUSVAARA NAAGAPHANNA|VAIDIKA ANUSVAARA VAAMAGOMUKHA|VAIDIKA ANUSVAARA VAAMAGOMUKHA SA-VAKRA|
VAIDIKA TIRYAK|VAIDIKA ANUSVAARA ANUGAAMII|VAIDIKA ANUSVAARA DAKSHINNAMUKHA|VAIDIKA ANUSVAARA TTHA-SADRISHA|VAIDIKA ANUSVAARA UBHAYATO MUKHA|
VAIDIKA JIHVAAMUULIIYA UPADHMAANIIYA|MUTTON QUAD|NUT|MUTTON|THICK SPACE|MID SPACE|QUOTATION DASH|SINGLE TURNED COMMA QUOTATION MARK|
SINGLE COMMA QUOTATION MARK|LOW SINGLE COMMA QUOTATION MARK|SINGLE REVERSED COMMA QUOTATION MARK|DOUBLE TURNED COMMA QUOTATION MARK|
DOUBLE COMMA QUOTATION MARK|LOW DOUBLE COMMA QUOTATION MARK|DOUBLE REVERSED COMMA QUOTATION MARK|OBELISK, OBELUS, LONG CROSS|
DIESIS, DOUBLE OBELISK|BLACK SMALL CIRCLE|THREE DOT LEADER|PERMILLE, PER THOUSAND|PERMYRIAD|MINUTES, FEET|SECONDS, INCHES|LINES (OLD MEASURE, 1/12 OF AN INCH)|
LEFT POINTING SINGLE GUILLEMET|RIGHT POINTING SINGLE GUILLEMET|JAPANESE KOME|URDU PARAGRAPH SEPARATOR|SPACING OVERSCORE|GREEK ENOTIKON|
Z NOTATION SEQUENCE CONCATENATION|SOLIDUS (TYPOGRAPHY)|ABZ�GLICH (GERMAN), MED AVDRAG AV (SWEDISH), PISKA (SWEDISH, "WHIP")|
PHUL, PUSPIKA|GREEK PENTONKION|QUINCUNX|EPIDAUREAN ACROPHONIC SYMBOL THREE|INVISIBLE COMMA|THIRD DERIVATIVE|FOURTH DERIVATIVE|JIS COMPOSITION CIRCLE|
CYRILLIC COMBINING TEN THOUSANDS SIGN|Z NOTATION FINITE FUNCTION DIACRITIC|ACTUARIAL BEND|CONTRACTION OPERATOR|LONG DOUBLE SLASH OVERLAY|
THE SET OF COMPLEX NUMBERS|DEGREES CENTIGRADE|CLONE|REAL NUMBER SYMBOL|HAMILTONIAN OPERATOR|HILBERT SPACE|HEIGHT, SPECIFIC ENTHALPY, ...|
IMAGINARY PART|LAPLACE TRANSFORM|MATHEMATICAL SYMBOL 'ELL'|LITER (TRADITIONAL SYMBOL)|POUNDS|NATURAL NUMBER|PUBLISHED|PHONORECORD SIGN|
WEIERSTRASS ELLIPTIC FUNCTION|THE SET OF RATIONAL NUMBERS|RIEMANN INTEGRAL|REAL PART|THE SET OF REAL NUMBERS|RECIPE|CROSS RATIO|
THE SET OF INTEGERS|MHO|BERNOULLI FUNCTION|ERROR|NATURAL EXPONENT|EMF (ELECTROMOTIVE FORCE)|FOURIER TRANSFORM|CLAUDIAN DIGAMMA INVERSUM|
M-MATRIX (PHYSICS)|GERMAN MARK CURRENCY SYMBOL, BEFORE WWII|ORDER, OF INFERIOR ORDER TO|FIRST TRANSFINITE CARDINAL (COUNTABLE)|
SECOND TRANSFINITE CARDINAL (THE CONTINUUM)|THIRD TRANSFINITE CARDINAL (FUNCTIONS OF A REAL VARIABLE)|FOURTH TRANSFINITE CARDINAL|
GAME|APOSTROPHIC C|CLAUDIAN ANTISIGMA|Z NOTATION TOTAL FUNCTION|Z NOTATION RELATION|FAST CURSOR LEFT|FAST CURSOR UP|Z NOTATION TOTAL SURJECTION|
FAST CURSOR RIGHT|FORM FEED|FAST CURSOR DOWN|Z NOTATION TOTAL INJECTION|Z NOTATION MAPLET|DEPTH SYMBOL|ELECTROLYSIS|LINE FEED|HOME|
TAB WITH SHIFT TAB|PAGE UP|PAGE DOWN|LEFTWARD TAB|RIGHTWARD TAB|SHIFT|CAPS LOCK|LEVEL 2 LOCK|CAPS LOCK|NUMERICS LOCK|LEVEL 3 SELECT|
LEVEL 3 LOCK|GROUP LOCK|HOME|END|SCROLLING|Z NOTATION PARTIAL FUNCTION|Z NOTATION PARTIAL RELATION|Z NOTATION FINITE FUNCTION|Z NOTATION FINITE RELATION|
UNIVERSAL QUANTIFIER|EXISTENTIAL QUANTIFIER|NULL SET|LAPLACE OPERATOR|FORWARD DIFFERENCE|SYMMETRIC DIFFERENCE (IN SET THEORY)|
BACKWARD DIFFERENCE|GRADIENT, DEL|SUCH THAT|Q.E.D.|PRODUCT SIGN|COPRODUCT SIGN|SUMMATION SIGN|COMPOSITE FUNCTION|APL JOT|RADICAL SIGN|
ANGLE ARC|SUCH THAT|APL STILE|WEDGE, CONJUNCTION|VEE, DISJUNCTION|CAP, HAT|CUP|SATURATING SUBTRACTION|VARIES WITH (PROPORTIONAL TO)|
DIFFERENCE BETWEEN|SIMILAR TO|NOT|CYCLE|APL TILDE|LAZY S|MOST POSITIVE|ALTERNATING CURRENT|ASYMPTOTIC TO|NEARLY EQUALS|APPROXIMATELY EQUAL TO|
CORRESPONDS TO|EQUIANGULAR|EQUAL TO BY DEFINITION|PLAINTIFF, QUANTIC|LOWER RANK THAN|HIGHER RANK THAN|INCLUDED IN SET|INCLUDES IN SET|
Z NOTATION BAG ADDITION|DIRECT SUM|VECTOR POINTING INTO PAGE|SYMMETRIC DIFFERENCE|TENSOR PRODUCT|VECTOR POINTING INTO PAGE|DIRECT PRODUCT|
VECTOR POINTING OUT OF PAGE|TURNSTILE|PROVES, IMPLIES, YIELDS|REDUCIBLE|REVERSE TURNSTILE|NON-THEOREM, DOES NOT YIELD|TOP|BASE, BOTTOM|
REDUCES TO|STATEMENT IS TRUE, VALID|IS A TAUTOLOGY|SATISFIES|RESULTS IN|Z NOTATION GENERALISED INTERSECTION|Z NOTATION GENERALISED UNION|
PROPER INTERSECTION|APL UPSTILE|APL DOWNSTILE|BEGINNING OF LINE|KISSEN (PILLOW)|COMMAND KEY|LINE MARKER|ENTER KEY|DELETE TO THE RIGHT KEY|
CLEAR KEY|DELETE TO THE LEFT KEY|UP TACK UNDERBAR|UP TACK JOT|DOWN TACK OVERBAR|DOWN TACK JOT|DOWN TACK DIAERESIS|HOOT|HOLLER|SMIRK|
CENTER|CONTROL|PAUSE|INTERRUPT, BREAK|ESCAPE|LEFT MOUSTACHE|RIGHT MOUSTACHE|TRANSIT|ON US|DASH|VIDEOTEX MOSAIC DG 15|VIDEOTEX MOSAIC DG 14|
VIDEOTEX MOSAIC DG 16|VIDEOTEX MOSAIC DG 17|VIDEOTEX MOSAIC DG 18|VIDEOTEX MOSAIC DG 19|VIDEOTEX MOSAIC DG 20|VIDEOTEX MOSAIC DG 03|
VIDEOTEX MOSAIC DG 21|VIDEOTEX MOSAIC DG 04|VIDEOTEX MOSAIC DG 22|VIDEOTEX MOSAIC DG 02|VIDEOTEX MOSAIC DG 23|VIDEOTEX MOSAIC DG 01|
VIDEOTEX MOSAIC DG 24|VIDEOTEX MOSAIC DG 13|SOLID|MODING MARK (IN IDEOGRAPHIC TEXT)|QUADRATURE|HISTOGRAM MARKER|TRINE|Z NOTATION RANGE RESTRICTION|
FORWARD ARROW INDICATOR|HAMILTON OPERATOR|Z NOTATION DOMAIN RESTRICTION|BACKWARD ARROW INDICATOR|TAINOME (JAPANESE, A KIND OF BULLET)|
ALWAYS (MODAL OPERATOR)|CLEAR WEATHER|CLOUDY WEATHER|RAINY WEATHER|SNOWY WEATHER|ST. ANDREW'S CROSS|SHOWERY WEATHER|TEA OR COFFEE, DEPENDING ON LOCALE|
FIST (TYPOGRAPHIC TERM)|POISON|CONSTANTINE'S CROSS, CHRISTOGRAM|SYMBOL OF IRAN|GURMUKHI KHANDA|QIAN2|DUI4|LI2|ZHEN4|XUN4|KAN3|GEN4|KUN1|
HAVE A NICE DAY!|COMPASS|VENUS|MARS|MINIM (ALTERNATE GLYPH)|SCORPIO|MINIM, DROP|VALENTINE|Z NOTATION INFIX BAG COUNT|MINING, WORKING DAY (IN TIMETABLES)|
NAUTICAL TERM, HARBOR (MAPS)|MILITARY TERM, BATTLEGROUND (MAPS), KILLED IN ACTION|MEDICAL TERM|LEGAL TERM, JURISPRUDENCE|CHEMICAL TERM, CHEMISTRY|
BOTANICAL TERM|TECHNOLOGY, TOOLS|NUCLEAR INSTALLATION (MAPS)|SOMEONE SPEAKING|BACKGROUND SPEAKING|THUNDER|LESBIANISM|MALE HOMOSEXUALITY|
BISEXUALITY|TRANSGENDERED SEXUALITY|HERMAPHRODITE (ENTOMOLOGY)|TRANSGENDERED SEXUALITY|TRANSGENDERED SEXUALITY|FERROUS IRON SULPHATE (ALCHEMY AND OLDER CHEMISTRY)|
MAGNESIUM (ALCHEMY AND OLDER CHEMISTRY)|ASEXUALITY, SEXLESS, GENDERLESS|ENGAGED, BETROTHED|ENGAGED, BETROTHED (GENEALOGY)|BURIED (GENEALOGY)|
CREMATED (GENEALOGY)|PARENTAL LOCK|LIGHT SNOW|PARTLY CLOUDY|RAINY WEATHER|HEAVY SNOW|THUNDERSTORM|ACCIDENT|UNDER CONSTRUCTION|ICY ROAD|
MAINTENANCE|ROAD CLOSED|TYRE CHAINS REQUIRED|DRIVE SLOW|CLOSED ENTRY|CLOSED TO LARGE VEHICLES|BLACK LORRY|CLOSED TO LARGE VEHICLES, ALTERNATE|
PUBLIC OFFICE|HOSPITAL|TORII|FACTORY|POWER PLANT, POWER SUBSTATION|BATHING BEACH|PARK|GOLF COURSE|FERRY BOAT TERMINAL|MARINA OR YACHT HARBOUR|
INTERSECTION|SKI RESORT|ICE SKATING RINK|TRACK AND FIELD, GYMNASIUM|CAMPING SITE|GRAVEYARD, MEMORIAL PARK, CEMETERY|PETROL STATION, GAS STATION|
DRIVE-IN RESTAURANT|JAPANESE SELF-DEFENCE FORCE SITE|SEXTILE|STARBURST|JACK|TURBOFAN|OBSTACLES ON THE ROAD, ARIB STD B24|ALDUS LEAF|
HEDERA, IVY LEAF|ORTHOGONAL TO|PULLBACK|PUSHOUT|RADIAL COMPONENT|NEVER (MODAL OPERATOR)|WAS NEVER (MODAL OPERATOR)|WILL NEVER BE (MODAL OPERATOR)|
WAS ALWAYS (MODAL OPERATOR)|WILL ALWAYS BE (MODAL OPERATOR)|Z NOTATION LEFT BAG BRACKET|Z NOTATION RIGHT BAG BRACKET|BRA|Z NOTATION LEFT SEQUENCE BRACKET|
KET|Z NOTATION RIGHT SEQUENCE BRACKET|Z NOTATION LEFT CHEVRON BRACKET|Z NOTATION RIGHT CHEVRON BRACKET|LGROUP|RGROUP|MAPS FROM|
MAPS TO|Z NOTATION PARTIAL SURJECTION|Z NOTATION FINITE SURJECTION|MAPS TO|MAPS FROM|MAPS TO|Z NOTATION PARTIAL INJECTION|Z NOTATION FINITE INJECTION|
BIJECTIVE MAPPING|Z NOTATION BIJECTION|Z NOTATION SURJECTIVE INJECTION|Z NOTATION FINITE SURJECTIVE INJECTION|VERTICAL BOWTIE|
WHITE FRAMUS|ISOTECH ENTITY &IINFIN;|D'ALEMBERTIAN|TAUTOLOGICAL EQUIVALENT|COLON RIGHT ARROW|Z NOTATION SCHEMA HIDING|MERGE|UPPER INTEGRAL|
LOWER INTEGRAL|LARGE BOWTIE|POSITIVE DIFFERENCE OR SUM|SUM OR POSITIVE DIFFERENCE|NIM-ADDITION|Z NOTATION BAG SUBTRACTION|IDENTICAL AND PARALLEL TO|
ABSOLUTE CONTINUITY|NOT INDEPENDENT|INDEPENDENT|ORDINARILY SATISFIES|INDEPENDENCE|NECESSARILY SATISFIES|INTERLEAVE|DIJKSTRA CHOICE|
N-ARY DIJKSTRA CHOICE|BASIC SYMBOL FOR SPEED LIMIT|PREFECTURAL OFFICE|MUNICIPAL OFFICE|TOWN OR VILLAGE OFFICE|POLICE STATION|TUAREG YAB|
TUAREG YAW|ADRAR YAJ|HARPOON YAZ|TAMATART|DIPLE PERIESTIGMENE|GNABORRETNI|PUNCTUS PERCONTATIVUS|LEFT ABSTRACT SYNTAX BRACKET|RIGHT ABSTRACT SYNTAX BRACKET|
SINGLE DOT BANGJEOM|DOUBLE DOT BANGJEOM|SHI|JI (NOT UNIQUE)|CHI|JI (NOT UNIQUE)|SMALL TSU|TSU|ZU (NOT UNIQUE)|FU|SHI|JI (NOT UNIQUE)|
CHI|JI (NOT UNIQUE)|SMALL TSU|TSU|ZU (NOT UNIQUE)|FU|CHAE UM|SSANGYESIEUNG|TATETEN|KAERITEN RE|SPEED LIMIT 10 KM/H|SPEED LIMIT 20 KM/H|
SPEED LIMIT 30 KM/H|SPEED LIMIT 40 KM/H|SPEED LIMIT 50 KM/H|SPEED LIMIT 60 KM/H|SPEED LIMIT 70 KM/H|SPEED LIMIT 80 KM/H|MARU-ITI, SYMBOL OF UNIFICATION|
KABUSIKI-GAISYA|YI SYLLABLE ITERATION MARK|HALANT, VIRAMA|VAIDIKA SAAMASVARA ANKA SHUUNYA|VAIDIKA SAAMASVARA ANKA EKA UDAATTA|
VAIDIKA SAAMASVARA ANKA DVI SVARITA|VAIDIKA SAAMASVARA ANKA TRI ANUDAATTA|VAIDIKA SAAMASVARA ANKA CHATUR|VAIDIKA SAAMASVARA ANKA PANCHA|
VAIDIKA SAAMASVARA ANKA SHATT|VAIDIKA SAAMASVARA ANKA SAPTA|VAIDIKA SAAMASVARA ANKA ASHTA|VAIDIKA SAAMASVARA ANKA NAVA|VAIDIKA SAAMASVARA ABHINIHITA|
VAIDIKA SAAMASVARA U|VAIDIKA SAAMASVARA KA|VAIDIKA SAAMASVARA NAMANA|VAIDIKA SAAMASVARA PRANNATAM|VAIDIKA SAAMASVARA RA|VAIDIKA SAAMASVARA VINATA|
VAIDIKA SAAMASVARA DIIRGHIIBHAAVA|VAIDIKA CANDRABINDU|VAIDIKA ANUSVAARA CANDRABINDU TIRYAK|VAIDIKA ANUSVAARA DVI CANDRABINDU TIRYAK|
VAIDIKA ANUSVAARA CANDRABINDU SA-DVI|VAIDIKA ANUSVAARA CANDRABINDU SA-TRI|VAIDIKA ANUSVAARA CANDRABINDU SA-AVAGRAHA|VAIDIKA PUSHPIKAA|
VAIDIKA TRUTIKAA|SHIROREKHAA|KALUAN|KAMITAN|KAMICA|KATILING|KATULUNG|KAJINA|KATULANG|DUO DEATAS|KAJUNJUNG|ARDHACANDRA|ANUSVARA|REPHA|VISARGA|
VOCALIC R|VOCALIC L|VOCALIC LL|QA|KHA|GHA|CHA|JNYA|JHA|TTHA|DDHA|NNA|THA|DHA|PHA|BHA|SHA|SSA|NUKTA|AA|O|I|II|U|UU|E|AI|AE|VOCALIC R|MEDIAL YA|MEDIAL RA|
VIRAMA|DANDA|DOUBLE DANDA|PERSON|ONE|KA|SA|LA|MA|CA|TA|KHA|NGA|THA|WA|YA|HA|U|PHA|A|GA|JHA|RA|JA|DA|GHA|DHA|BHA|K|L|M|P|N|T|NG|I|O|I|AA|E|OU|U|EI|ANUSVARA|DOUBLE DANDA|
HEAVY TONE|KILLER|PHUN|AMA|ANI|AHUM|MARI|MANGA|TARUK|TARET|NIPAL|MAPAL|PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET|
BYTE ORDER MARK (BOM), ZWNBSP|IDEOGRAM B129 FLOUR|IDEOGRAM B109 OX|IDEOGRAM B030 FIGS|IDEOGRAM B106 SHEEP|IDEOGRAM B031 FLAX|IDEOGRAM B108 PIG|
IDEOGRAM B144 SAFFRON|IDEOGRAM B107 GOAT|FRUIT|SAFFRON|OINTMENT|HONEY|CHEESE|PUGIO|"GUPIO", INVERTED SWORD|HALANT|HEH|LAM|HAH|MEEM|QAF|
WAW|SHEEN|REH|BEH|TEH|KAF|NOON|KHAH|SAD|FEH|ALEF|AIN|DAD|JEEM|DAL|GHAIN|TAH|ZAIN|THAL|YEH|THEH|ZAH|BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS|
DOUBLE WHOLE-REST, BREVE REST|SEMIBREVE REST|MINIM REST|CROCHET REST|GREEK INSTRUMENTAL NOTATION SYMBOL-3|GREEK INSTRUMENTAL NOTATION SYMBOL-21|
GREEK INSTRUMENTAL NOTATION SYMBOL-9|GREEK INSTRUMENTAL NOTATION SYMBOL-44|GREEK INSTRUMENTAL NOTATION SYMBOL-41|GREEK INSTRUMENTAL NOTATION SYMBOL-35|
GREEK INSTRUMENTAL NOTATION SYMBOL-15|GREEK INSTRUMENTAL NOTATION SYMBOL-28|GREEK INSTRUMENTAL NOTATION SYMBOL-20|REN|TIAN REN|
DI REN|REN TIAN|REN DI|REN REN|POWER SET|NEW TESTAMENT MAJORITY TEXT|SEPTUAGINT, GREEK OLD TESTAMENT|\IMATH|\JMATH|HONGZHONG|QINGFA|
BAIBAN|WAN|TIAO|BING|MEI|LAN|ZHU|JU|BAIDA|COMPACT DISC, SINGLE DISC RECORD|DISC RECORD|BANK|B-MODE STEREO COMPRESSION BROADCASTING SERVICE (ARIB STD B24)|
NEWS (ARIB STD B24)|PROGRESSIVE BROADCASTING (ARIB STD B24)|STEREO BROADCASTING SERVICE (ARIB STD B24)|WIDE-FORMAT 16:9 BROADCASTING SERVICE (ARIB STD B24)|
HDTV|MULTI-VIEW TELEVISION|SDTV|SURROUND STEREO BROADCASTING SERVICE|PAY-PER-VIEW|HOTEL (ARIB STD B24)|PARKING SPACE (ARIB STD B24)|
JUNCTION (ARIB STD B24)|LEISURE CENTER (ARIB STD B24)|MUSEUM, CULTURAL CENTER (ARIB STD B24)|PARKING SPACE EMPTY-FULL (ARIB STD B24)|
PARKING SPACE CLOSED|INTERCHANGE, RAMP|PARKING AREA|SERVICE AREA|DISC JOCKEY|AND OTHERS|BROADCASTING SERVICE WITH SIGN LANGUAGE INTERPRETATION|
CLOSE-CAPTIONED BROADCASTING|TWO-WAY BROADCASTING SERVICE|DATA BROADCASTING SERVICE LINKED WITH A MAIN PROGRAM|BILINGUAL BROADCASTING SERVICE|
SECOND BASE|SOUND-MULTIPLEX BROADCASTING SERVICE|COMMENTARY BROADCASTING|WEATHER FORECAST|TRAFFIC INFORMATION|DRAMA FILM|FREE BROADCASTING SERVICE|
PAY BROADCASTING SERVICE|THE FIRST PART|THE LATTER PART|REBROADCAST|NEW SERIES OF PROGRAMS|FIRST RELEASED PROGRAM|THE LAST EPISODE|
LIVE BROADCAST|MAIL-ORDER|VOICE ACTORS|DUBBED VERSION|PERFORMED BY|PITCHER|CATCHER|FIRST BASE|THIRD BASE|SHORT STOP|LEFT FIELD|CENTER FIELD|
RIGHT FIELD|DESIGNATED HITTER|RUNNER|BATTER|HOME RUN|TRIPLE|DOUBLE|SAFE|RUN|HIT|STEAL|WIN|LOSS""".replace("\n", "").replace("\t", "").split("|")
))
#########################################################################################################
import gzip
import pickle
from collections import defaultdict
from os import makedirs
from os.path import expanduser, join
from sys import maxunicode
from unicodedata import name as unicodename
def main():
# Go through all of the possible places for
# a Unicode character to exist (no guarantee
# this will take any particular amount of
# time)
unicode_names = defaultdict(set)
for character in map(chr, range(maxunicode+1)):
try:
name = unicodename(character)
except ValueError:
continue
# We only want to trigger for those who's names'
# contain a word where the first two letters are
# the first two letters of the "search" for speed,
# based on real timings
#
# People with SSDs and faster computers could choose
# more lenient searching
for subname in name.split():
if len(subname) > 1 and "CJK UNIFIED IDEOGRAPH" not in name:
unicode_names[subname[:2]].add(name)
# Same as above
unicode_extras = defaultdict(set)
for character, name in extras:
for subname in name.split():
if len(subname) > 1 and "CJK UNIFIED IDEOGRAPH" not in name:
unicode_extras[subname[:2]].add((character, name))
# Save to a gziped pickle file
cache_file = join(expanduser("~"), ".sublime")
makedirs(cache_file, exist_ok=True)
with gzip.GzipFile(join(cache_file, "unicode cache.pickle.gz"), 'wb') as output:
pickle.dump((dict(unicode_names), dict(unicode_extras)), output, 3)
if __name__ == "__main__":
main()