Skip to content

Commit e5a5124

Browse files
committed
minor minor
1 parent 49b69dd commit e5a5124

File tree

1 file changed

+68
-74
lines changed

1 file changed

+68
-74
lines changed

Lib/difflib.py

Lines changed: 68 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939

4040

4141
def _adjust_indices(seq, start, stop):
42-
assert start >= 0
42+
if start < 0:
43+
raise ValueError('Starting index can not be negative')
4344
size = len(seq)
4445
if stop is None or stop > size:
4546
stop = size
@@ -52,9 +53,10 @@ class _LCSUBSimple:
5253
Complexity:
5354
T: O(n1 + n2) best, O(n1 × n2) worst
5455
S: O(n2)
56+
, where n1 = len(a), n2 = len(b)
5557
5658
Members:
57-
b2j for x in b, b2j[x] is a list of the indices (into b)
59+
_b2j for x in b, b2j[x] is a list of the indices (into b)
5860
at which x appears; junk elements do not appear
5961
"""
6062

@@ -73,17 +75,18 @@ def isbuilt(self, blo, bhi):
7375

7476
def _get_b2j(self):
7577
b2j = self._b2j
76-
if b2j is None:
77-
b2j = {} # positions of each element in b
78-
for i, elt in enumerate(self.b):
79-
indices = b2j.setdefault(elt, [])
80-
indices.append(i)
81-
junk = self.junk
82-
if junk:
83-
for elt in junk:
84-
del b2j[elt]
85-
self._b2j = b2j
78+
if b2j is not None:
79+
return b2j
8680

81+
b2j = {} # positions of each element in b
82+
for i, elt in enumerate(self.b):
83+
indices = b2j.setdefault(elt, [])
84+
indices.append(i)
85+
junk = self.junk
86+
if junk:
87+
for elt in junk:
88+
del b2j[elt]
89+
self._b2j = b2j
8790
return b2j
8891

8992
def find(self, a, alo=0, ahi=None, blo=0, bhi=None):
@@ -120,18 +123,13 @@ def find(self, a, alo=0, ahi=None, blo=0, bhi=None):
120123
return besti, bestj, bestsize
121124

122125

123-
_LENGTH = 0
124-
_LINK = 1
125-
_NEXT = 2
126-
_POS = 3
127-
128-
129126
class _LCSUBAutomaton:
130127
"""Suffix Automaton for finding longest common substring.
131128
132129
Complexity:
133130
T: O(n1 + n2) - roughly 2 * n1 + 6 * n2
134131
S: O(n2) - maximum nodes: 2 * n2 + 1
132+
, where n1 = len(a), n2 = len(b)
135133
136134
Node spec:
137135
node: list = [length: int, link: list, next: dict, end_pos: int]
@@ -157,62 +155,58 @@ def isbuilt(self, blo, bhi):
157155

158156
def _get_root(self, blo, bhi):
159157
"""
160-
Automaton needs to rebuild for every (blo, bhi)
161-
This is made to cache the last one and only rebuild on new values
162-
163-
Note that to construct Automaton that can be queried for any
164-
(blo, bhi), each node would need to store a store a set of
165-
indices. And this is prone to O(n^2) memory explosion.
166-
Current approach maintains reasonable memory guarantees
167-
and is also much simpler in comparison.
158+
Automaton needs to rebuild for every (start2, stop2)
159+
It is made to cache the last one and only rebuilds for new range
168160
"""
169161
key = (blo, bhi)
170162
root = self._root
171-
if root is None or self._cache != key:
172-
root = [0, None, {}, -1]
173-
b = self.b
174-
junk = self.junk
175-
last_len = 0
176-
last = root
177-
for j in range(blo, bhi):
178-
c = b[j]
179-
if c in junk:
180-
last_len = 0
181-
last = root
163+
if root is not None and self._cache == key:
164+
return root
165+
166+
LEN, LINK, NEXT, EPOS = 0, 1, 2, 3
167+
root = [0, None, {}, -1]
168+
b = self.b
169+
junk = self.junk
170+
last_len = 0
171+
last = root
172+
for j in range(blo, bhi):
173+
c = b[j]
174+
if c in junk:
175+
last_len = 0
176+
last = root
177+
else:
178+
last_len += 1
179+
curr = [last_len, None, {}, j]
180+
181+
p = last
182+
p_next = p[NEXT]
183+
while c not in p_next:
184+
p_next[c] = curr
185+
if p is root:
186+
curr[LINK] = root
187+
break
188+
p = p[LINK]
189+
p_next = p[NEXT]
182190
else:
183-
last_len += 1
184-
curr = [last_len, None, {}, j]
185-
186-
p = last
187-
p_next = p[_NEXT]
188-
while c not in p_next:
189-
p_next[c] = curr
190-
if p is root:
191-
curr[_LINK] = root
192-
break
193-
p = p[_LINK]
194-
p_next = p[_NEXT]
191+
q = p_next[c]
192+
p_len_p1 = p[LEN] + 1
193+
if p_len_p1 == q[LEN]:
194+
curr[LINK] = q
195195
else:
196-
q = p_next[c]
197-
p_length_p1 = p[_LENGTH] + 1
198-
if p_length_p1 == q[_LENGTH]:
199-
curr[_LINK] = q
200-
else:
201-
# Copy `q[_POS]` to ensure leftmost match in b
202-
clone = [p_length_p1, q[_LINK], q[_NEXT].copy(), q[_POS]]
203-
while (p_next := p[_NEXT]).get(c) is q:
204-
p_next[c] = clone
205-
if p is root:
206-
break
207-
p = p[_LINK]
208-
209-
q[_LINK] = curr[_LINK] = clone
210-
211-
last = curr
212-
213-
self._root = root
214-
self._cache = key
196+
# Copy `q[EPOS]` to ensure leftmost match in b
197+
clone = [p_len_p1, q[LINK], q[NEXT].copy(), q[EPOS]]
198+
while (p_next := p[NEXT]).get(c) is q:
199+
p_next[c] = clone
200+
if p is root:
201+
break
202+
p = p[LINK]
203+
204+
q[LINK] = curr[LINK] = clone
205+
206+
last = curr
215207

208+
self._root = root
209+
self._cache = key
216210
return root
217211

218212
def find(self, a, alo=0, ahi=None, blo=0, bhi=None):
@@ -221,6 +215,7 @@ def find(self, a, alo=0, ahi=None, blo=0, bhi=None):
221215
if alo >= ahi or blo >= bhi:
222216
return (alo, blo, 0)
223217

218+
LEN, LINK, NEXT, EPOS = 0, 1, 2, 3
224219
root = self._get_root(blo, bhi)
225220
junk = self.junk
226221
v = root
@@ -235,11 +230,11 @@ def find(self, a, alo=0, ahi=None, blo=0, bhi=None):
235230
v = root
236231
l = 0
237232
else:
238-
while v is not root and c not in v[_NEXT]:
239-
v = v[_LINK]
240-
l = v[_LENGTH]
233+
while v is not root and c not in v[NEXT]:
234+
v = v[LINK]
235+
l = v[LEN]
241236

242-
v_next = v[_NEXT]
237+
v_next = v[NEXT]
243238
if c in v_next:
244239
v = v_next[c]
245240
l += 1
@@ -252,8 +247,7 @@ def find(self, a, alo=0, ahi=None, blo=0, bhi=None):
252247
return (alo, blo, 0)
253248

254249
start_in_s1 = best_pos + 1 - best_len
255-
end_in_s2 = best_state[_POS]
256-
start_in_s2 = end_in_s2 + 1 - best_len
250+
start_in_s2 = best_state[EPOS] + 1 - best_len
257251
return (start_in_s1, start_in_s2, best_len)
258252

259253

0 commit comments

Comments
 (0)