11"""
2- Author : Turfa Auliarachman
3- Date : October 12, 2016
4-
52This is a pure Python implementation of Dynamic Programming solution to the edit
6- distance problem.
7-
8- The problem is :
9- Given two strings A and B. Find the minimum number of operations to string B such that
10- A = B. The permitted operations are removal, insertion, and substitution.
3+ distance problem (also known as Levenshtein distance).
4+
5+ The Problem:
6+ Given two strings A and B, find the minimum number of operations to transform
7+ string A into string B. The permitted operations are:
8+ 1. Insertion - Add a character
9+ 2. Deletion - Remove a character
10+ 3. Substitution - Replace one character with another
11+
12+ Time Complexity: O(m × n) where m and n are the lengths of the two strings
13+ Space Complexity: O(m × n) for the DP table
14+
15+ Applications:
16+ - Spell checkers and autocorrect
17+ - DNA sequence alignment in bioinformatics
18+ - Plagiarism detection
19+ - Speech recognition
20+ - Fuzzy string matching
21+
22+ Example:
23+ >>> EditDistance().min_dist_bottom_up("intention", "execution")
24+ 5
25+ >>> # The 5 edits are: intention -> inention -> enention -> exention -> executon -> execution
1126"""
1227
1328
1429class EditDistance :
1530 """
16- Use :
17- solver = EditDistance()
18- editDistanceResult = solver.solve(firstString, secondString)
31+ A class to compute the edit distance between two strings using dynamic programming.
32+
33+ This implementation provides both top-down (memoization) and bottom-up (tabulation)
34+ approaches. The bottom-up approach is generally preferred for its iterative nature
35+ and better space efficiency potential.
36+
37+ Attributes:
38+ word1 (str): First input string.
39+ word2 (str): Second input string.
40+ dp (list[list[int]]): Dynamic programming table for storing intermediate results.
41+
42+ Example:
43+ >>> solver = EditDistance()
44+ >>> solver.min_dist_bottom_up("kitten", "sitting")
45+ 3
1946 """
20-
47+
2148 def __init__ (self ):
49+ """Initialize the EditDistance solver with empty strings."""
2250 self .word1 = ""
2351 self .word2 = ""
2452 self .dp = []
25-
53+
2654 def __min_dist_top_down_dp (self , m : int , n : int ) -> int :
55+ """
56+ Helper method for top-down dynamic programming with memoization.
57+
58+ Recursively computes the minimum edit distance between word1[0:m+1] and
59+ word2[0:n+1] by considering three possible operations at each step.
60+
61+ Args:
62+ m (int): Current index in word1 (0-indexed).
63+ n (int): Current index in word2 (0-indexed).
64+
65+ Returns:
66+ int: Minimum edit distance between the substrings.
67+
68+ Base Cases:
69+ - If m == -1: Need to insert all n+1 characters from word2
70+ - If n == -1: Need to delete all m+1 characters from word1
71+
72+ Recursive Case:
73+ - If characters match: No operation needed, move diagonally
74+ - Otherwise: Take minimum of insert, delete, replace + 1
75+ """
2776 if m == - 1 :
2877 return n + 1
2978 elif n == - 1 :
@@ -38,51 +87,90 @@ def __min_dist_top_down_dp(self, m: int, n: int) -> int:
3887 delete = self .__min_dist_top_down_dp (m - 1 , n )
3988 replace = self .__min_dist_top_down_dp (m - 1 , n - 1 )
4089 self .dp [m ][n ] = 1 + min (insert , delete , replace )
41-
90+
4291 return self .dp [m ][n ]
43-
92+
4493 def min_dist_top_down (self , word1 : str , word2 : str ) -> int :
4594 """
46- >>> EditDistance().min_dist_top_down("intention", "execution")
47- 5
48- >>> EditDistance().min_dist_top_down("intention", "")
49- 9
50- >>> EditDistance().min_dist_top_down("", "")
51- 0
95+ Calculate edit distance using top-down approach with memoization.
96+
97+ This approach starts from the full problem and recursively breaks it down
98+ into smaller subproblems, caching results to avoid redundant computations.
99+
100+ Args:
101+ word1 (str): The source string.
102+ word2 (str): The target string.
103+
104+ Returns:
105+ int: Minimum number of operations to transform word1 into word2.
106+
107+ Examples:
108+ >>> EditDistance().min_dist_top_down("intention", "execution")
109+ 5
110+ >>> EditDistance().min_dist_top_down("intention", "")
111+ 9
112+ >>> EditDistance().min_dist_top_down("", "")
113+ 0
114+ >>> EditDistance().min_dist_top_down("kitten", "sitting")
115+ 3
52116 """
53117 self .word1 = word1
54118 self .word2 = word2
55119 self .dp = [[- 1 for _ in range (len (word2 ))] for _ in range (len (word1 ))]
56-
120+
57121 return self .__min_dist_top_down_dp (len (word1 ) - 1 , len (word2 ) - 1 )
58-
122+
59123 def min_dist_bottom_up (self , word1 : str , word2 : str ) -> int :
60124 """
61- >>> EditDistance().min_dist_bottom_up("intention", "execution")
62- 5
63- >>> EditDistance().min_dist_bottom_up("intention", "")
64- 9
65- >>> EditDistance().min_dist_bottom_up("", "")
66- 0
125+ Calculate edit distance using bottom-up approach with tabulation.
126+
127+ This approach builds the solution iteratively from smaller subproblems,
128+ filling a DP table where dp[i][j] represents the edit distance between
129+ the first i characters of word1 and first j characters of word2.
130+
131+ Args:
132+ word1 (str): The source string.
133+ word2 (str): The target string.
134+
135+ Returns:
136+ int: Minimum number of operations to transform word1 into word2.
137+
138+ Algorithm:
139+ 1. Initialize DP table of size (m+1) × (n+1)
140+ 2. Base cases: dp[i][0] = i (delete all), dp[0][j] = j (insert all)
141+ 3. For each cell, if characters match: dp[i][j] = dp[i-1][j-1]
142+ 4. Otherwise: dp[i][j] = 1 + min(insert, delete, replace)
143+
144+ Examples:
145+ >>> EditDistance().min_dist_bottom_up("intention", "execution")
146+ 5
147+ >>> EditDistance().min_dist_bottom_up("intention", "")
148+ 9
149+ >>> EditDistance().min_dist_bottom_up("", "")
150+ 0
151+ >>> EditDistance().min_dist_bottom_up("kitten", "sitting")
152+ 3
153+ >>> EditDistance().min_dist_bottom_up("horse", "ros")
154+ 3
67155 """
68156 self .word1 = word1
69157 self .word2 = word2
70158 m = len (word1 )
71159 n = len (word2 )
72160 self .dp = [[0 for _ in range (n + 1 )] for _ in range (m + 1 )]
73-
161+
74162 for i in range (m + 1 ):
75163 for j in range (n + 1 ):
76- if i == 0 : # first string is empty
164+ if i == 0 : # first string is empty - insert all characters
77165 self .dp [i ][j ] = j
78- elif j == 0 : # second string is empty
166+ elif j == 0 : # second string is empty - delete all characters
79167 self .dp [i ][j ] = i
80168 elif word1 [i - 1 ] == word2 [j - 1 ]: # last characters are equal
81169 self .dp [i ][j ] = self .dp [i - 1 ][j - 1 ]
82170 else :
83- insert = self .dp [i ][j - 1 ]
84- delete = self .dp [i - 1 ][j ]
85- replace = self .dp [i - 1 ][j - 1 ]
171+ insert = self .dp [i ][j - 1 ] # Insert character
172+ delete = self .dp [i - 1 ][j ] # Delete character
173+ replace = self .dp [i - 1 ][j - 1 ] # Replace character
86174 self .dp [i ][j ] = 1 + min (insert , delete , replace )
87175 return self .dp [m ][n ]
88176
0 commit comments