|
38 | 38 | Match = _namedtuple('Match', 'a b size') |
39 | 39 |
|
40 | 40 |
|
41 | | -class _LCSUBDict: |
42 | | - """Dict method for finding longest common substring. |
| 41 | +class _LCSUBSimple: |
| 42 | + """Simple dict method for finding longest common substring. |
43 | 43 |
|
44 | 44 | Complexity: |
45 | 45 | T: O(n1 + n2) best, O(n1 × n2) worst |
@@ -481,18 +481,18 @@ def __chain_b(self): |
481 | 481 |
|
482 | 482 | self._max_bcount = max(bcounts.values()) if bcounts else 0 |
483 | 483 | self._all_junk = frozenset(junk | popular) |
484 | | - self._lcsub_aut = None # _LCSUBAutomaton instance |
485 | | - self._lcsub_dict = None # _LCSUBDict instanct |
| 484 | + self._lcsub_automaton = None # _LCSUBAutomaton instance |
| 485 | + self._lcsub_simple = None # _LCSUBSimple instanct |
486 | 486 |
|
487 | 487 | def _get_lcsub_calculator(self, automaton=False): |
488 | 488 | if automaton: |
489 | | - if self._lcsub_aut is None: |
490 | | - self._lcsub_aut = _LCSUBAutomaton(self.b, self._all_junk) |
491 | | - return self._lcsub_aut |
| 489 | + if self._lcsub_automaton is None: |
| 490 | + self._lcsub_automaton = _LCSUBAutomaton(self.b, self._all_junk) |
| 491 | + return self._lcsub_automaton |
492 | 492 | else: |
493 | | - if self._lcsub_dict is None: |
494 | | - self._lcsub_dict = _LCSUBDict(self.b, self._all_junk) |
495 | | - return self._lcsub_dict |
| 493 | + if self._lcsub_simple is None: |
| 494 | + self._lcsub_simple = _LCSUBSimple(self.b, self._all_junk) |
| 495 | + return self._lcsub_simple |
496 | 496 |
|
497 | 497 | @property |
498 | 498 | def b2j(self): |
@@ -574,7 +574,9 @@ def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None): |
574 | 574 | # For that specific set it gave selection accuracy of 95%. |
575 | 575 | # Weak spot in this is cases with little or no element overlap at all. |
576 | 576 | # However, such check would have more cost than benefit. |
577 | | - use_automaton = self._max_bcount * asize > bsize * 6 + asize * 2 |
| 577 | + automaton_cost = bsize * 6 + asize * 2 |
| 578 | + simple_cost = self._max_bcount * asize |
| 579 | + use_automaton = simple_cost > automaton_cost |
578 | 580 | calc = self._get_lcsub_calculator(use_automaton) |
579 | 581 | besti, bestj, bestsize = calc.find(a, alo, ahi, blo, bhi) |
580 | 582 |
|
|
0 commit comments