|
6 | 6 | """ |
7 | 7 |
|
8 | 8 |
|
9 | | -def longest_common_subsequence(x: str, y: str): |
| 9 | +def longest_common_subsequence_string(u: str, v: str) -> str: |
10 | 10 | """ |
11 | | - Finds the longest common subsequence between two strings. Also returns the |
12 | | - The subsequence found |
13 | | -
|
14 | | - Parameters |
15 | | - ---------- |
16 | | -
|
17 | | - x: str, one of the strings |
18 | | - y: str, the other string |
19 | | -
|
20 | | - Returns |
21 | | - ------- |
22 | | - L[m][n]: int, the length of the longest subsequence. Also equal to len(seq) |
23 | | - Seq: str, the subsequence found |
24 | | -
|
25 | | - >>> longest_common_subsequence("programming", "gaming") |
26 | | - (6, 'gaming') |
27 | | - >>> longest_common_subsequence("physics", "smartphone") |
28 | | - (2, 'ph') |
29 | | - >>> longest_common_subsequence("computer", "food") |
30 | | - (1, 'o') |
31 | | - >>> longest_common_subsequence("", "abc") # One string is empty |
32 | | - (0, '') |
33 | | - >>> longest_common_subsequence("abc", "") # Other string is empty |
34 | | - (0, '') |
35 | | - >>> longest_common_subsequence("", "") # Both strings are empty |
36 | | - (0, '') |
37 | | - >>> longest_common_subsequence("abc", "def") # No common subsequence |
38 | | - (0, '') |
39 | | - >>> longest_common_subsequence("abc", "abc") # Identical strings |
40 | | - (3, 'abc') |
41 | | - >>> longest_common_subsequence("a", "a") # Single character match |
42 | | - (1, 'a') |
43 | | - >>> longest_common_subsequence("a", "b") # Single character no match |
44 | | - (0, '') |
45 | | - >>> longest_common_subsequence("abcdef", "ace") # Interleaved subsequence |
46 | | - (3, 'ace') |
47 | | - >>> longest_common_subsequence("ABCD", "ACBD") # No repeated characters |
48 | | - (3, 'ABD') |
| 11 | + Return the longest common subsequence of two strings using |
| 12 | + dynamic programming reconstruction. |
| 13 | +
|
| 14 | + >>> longest_common_subsequence_string("AGGTAB", "GXTXAYB") |
| 15 | + 'GTAB' |
| 16 | + >>> longest_common_subsequence_string("abcde", "ace") |
| 17 | + 'ace' |
| 18 | + >>> longest_common_subsequence_string("abc", "abc") |
| 19 | + 'abc' |
| 20 | + >>> longest_common_subsequence_string("abc", "def") |
| 21 | + '' |
| 22 | + >>> longest_common_subsequence_string("", "abc") |
| 23 | + '' |
49 | 24 | """ |
50 | | - # find the length of strings |
51 | | - |
52 | | - assert x is not None |
53 | | - assert y is not None |
54 | | - |
55 | | - m = len(x) |
56 | | - n = len(y) |
| 25 | + m, n = len(u), len(v) |
57 | 26 |
|
58 | | - # declaring the array for storing the dp values |
| 27 | + # Build the DP table |
59 | 28 | dp = [[0] * (n + 1) for _ in range(m + 1)] |
60 | | - |
61 | 29 | for i in range(1, m + 1): |
62 | 30 | for j in range(1, n + 1): |
63 | | - match = 1 if x[i - 1] == y[j - 1] else 0 |
64 | | - |
65 | | - dp[i][j] = max(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1] + match) |
| 31 | + if u[i - 1] == v[j - 1]: |
| 32 | + dp[i][j] = dp[i - 1][j - 1] + 1 |
| 33 | + else: |
| 34 | + dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) |
66 | 35 |
|
67 | | - seq = "" |
| 36 | + # Backtrack to reconstruct the actual subsequence |
| 37 | + lcs: list[str] = [] |
68 | 38 | i, j = m, n |
69 | 39 | while i > 0 and j > 0: |
70 | | - match = 1 if x[i - 1] == y[j - 1] else 0 |
71 | | - |
72 | | - if dp[i][j] == dp[i - 1][j - 1] + match: |
73 | | - if match == 1: |
74 | | - seq = x[i - 1] + seq |
| 40 | + if u[i - 1] == v[j - 1]: |
| 41 | + lcs.append(u[i - 1]) |
75 | 42 | i -= 1 |
76 | 43 | j -= 1 |
77 | | - elif dp[i][j] == dp[i - 1][j]: |
| 44 | + elif dp[i - 1][j] > dp[i][j - 1]: |
78 | 45 | i -= 1 |
79 | 46 | else: |
80 | 47 | j -= 1 |
81 | 48 |
|
82 | | - return dp[m][n], seq |
83 | | - |
| 49 | + return "".join(reversed(lcs)) |
84 | 50 |
|
85 | 51 | if __name__ == "__main__": |
86 | 52 | a = "AGGTAB" |
87 | 53 | b = "GXTXAYB" |
88 | | - expected_ln = 4 |
89 | 54 | expected_subseq = "GTAB" |
90 | 55 |
|
91 | | - ln, subseq = longest_common_subsequence(a, b) |
92 | | - print("len =", ln, ", sub-sequence =", subseq) |
93 | | - import doctest |
| 56 | + subseq = longest_common_subsequence_string(a, b) |
| 57 | + print("sub-sequence =", subseq) |
| 58 | + assert subseq == expected_subseq |
94 | 59 |
|
| 60 | + import doctest |
95 | 61 | doctest.testmod() |
0 commit comments