Skip to content
This repository was archived by the owner on Jun 10, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions chpronounce/chp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def _append_phrase(self, ph, res):
def _append_word(self, word, pos, res):
if word in self.dic[1]:
for dy, poses in self.dic[1][word]:

if pos in poses:
res.append(dy)
break
Expand Down Expand Up @@ -55,7 +56,7 @@ def get_duyin(self, sentence):
if len(word) == 1:
self._append_word(word, pos, res)
else:
if word in self.dic[len(word)]:
if len(word) in self.dic and word in self.dic[len(word)]:
self._append_phrase(word, res)
else:
sub = ""
Expand All @@ -64,13 +65,14 @@ def get_duyin(self, sentence):
self._break_down(sub, pos, res)
sub = ""
sub += c
if len(sub) > 1 and sub in self.dic[len(sub)]:
if len(word) in self.dic and len(sub) > 1 and sub in self.dic[len(sub)]:
self._append_phrase(sub, res)
sub = ""
self._break_down(sub, pos, res)

if any(c in sentence for c in "不一"):
for i in range(len(sentence)):
if any(sentence[i] == c for c in "不一") and i + 1 != len(sentence):
if sentence[i] in "不一" and i + 1 != len(sentence):
if res[i + 1][2] == 4:
py, zy, tone = res[i]
tone = 2
Expand Down
8 changes: 4 additions & 4 deletions chpronounce/postag
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ n 名词 名
t 时间词 副
s 处所词 副
f 方位词 副
m 数词 X
m 数词
q 量词 量
b 区别词 动
r 代词 名
Expand All @@ -22,12 +22,12 @@ j 简称 缀
h 前接成分 缀
k 后接成分 缀
g 语素 名
x 非语素字 X
w 标点符号 X
x 非语素字
w 标点符号
nr 人名 名
ns 地名 名
nt 机构名称 名
nx 外文字符 X
nx 外文字符
nz 其它专名 名
vd 副动词 动
vn 名动词 动
Expand Down
Binary file modified chpronounce/xdic.pkl
Binary file not shown.
16 changes: 8 additions & 8 deletions verify_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def main():
ignore_flag = True
break
if ignore_flag:
continue
to_pop.append((word_len, word, 0))

if not isinstance(lis, list):
lis = [lis]
Expand Down Expand Up @@ -80,7 +80,7 @@ def main():
print("t |", "\t ".join(map(str, tones)))
print("+++++++++++++++-----------------------------")

if len(cys) != word_len or len(tones) != word_len:
if len(cys) != word_len or len(tones) != word_len or len(pys) != word_len:
to_pop.append((word_len, word, iol))
logging.warning(f"{word}[{iol}] is invalid.")
continue
Expand All @@ -90,17 +90,16 @@ def main():
x = [y for y in x if y in CHEWINGS]
x = "".join(x)
if xx != x:

logging.warning(f"[{word}] {xx} ---> {x}")
cys[i] = x

ori = dic[word_len][word]
# if word_len == 1:
# pys, cys, tones = pys[0], cys[0], tones[0]
if word_len == 1:
pys, cys, tones = pys[0], cys[0], tones[0]
if isinstance(dic[word_len][word], list):
dic[word_len][word][iol] = ((pys, cys, tones), pos) if pos != '' else (pys, cys, tones)
dic[word_len][word][iol] = ((pys, cys, tones), pos) # if pos != '' else (pys, cys, tones)
else:
dic[word_len][word] = ((pys, cys, tones), pos) if pos != '' else (pys, cys, tones)
dic[word_len][word] = (pys, cys, tones) # if pos != '' else (pys, cys, tones)

if ori != dic[word_len][word]:
n_diff += 1
Expand All @@ -110,7 +109,8 @@ def main():
# input()

for word_len, word, iol in to_pop:
dic[word_len].pop(word)
if word in dic[word_len]:
dic[word_len].pop(word)

# summary
total = 0
Expand Down