python person name matching

Solutions on MaxInterview for python person name matching by the best coders in the world

showing results for - "python person name matching"
Amelia
19 Sep 2017
1import numpy as np
2def levenshtein_ratio_and_distance(s, t, ratio_calc = False):
3    """ levenshtein_ratio_and_distance:
4        Calculates levenshtein distance between two strings.
5        If ratio_calc = True, the function computes the
6        levenshtein distance ratio of similarity between two strings
7        For all i and j, distance[i,j] will contain the Levenshtein
8        distance between the first i characters of s and the
9        first j characters of t
10    """
11    # Initialize matrix of zeros
12    rows = len(s)+1
13    cols = len(t)+1
14    distance = np.zeros((rows,cols),dtype = int)
15
16    # Populate matrix of zeros with the indeces of each character of both strings
17    for i in range(1, rows):
18        for k in range(1,cols):
19            distance[i][0] = i
20            distance[0][k] = k
21
22    # Iterate over the matrix to compute the cost of deletions,insertions and/or substitutions    
23    for col in range(1, cols):
24        for row in range(1, rows):
25            if s[row-1] == t[col-1]:
26                cost = 0 # If the characters are the same in the two strings in a given position [i,j] then the cost is 0
27            else:
28                # In order to align the results with those of the Python Levenshtein package, if we choose to calculate the ratio
29                # the cost of a substitution is 2. If we calculate just distance, then the cost of a substitution is 1.
30                if ratio_calc == True:
31                    cost = 2
32                else:
33                    cost = 1
34            distance[row][col] = min(distance[row-1][col] + 1,      # Cost of deletions
35                                 distance[row][col-1] + 1,          # Cost of insertions
36                                 distance[row-1][col-1] + cost)     # Cost of substitutions
37    if ratio_calc == True:
38        # Computation of the Levenshtein Distance Ratio
39        Ratio = ((len(s)+len(t)) - distance[row][col]) / (len(s)+len(t))
40        return Ratio
41    else:
42        # print(distance) # Uncomment if you want to see the matrix showing how the algorithm computes the cost of deletions,
43        # insertions and/or substitutions
44        # This is the minimum number of edits needed to convert string a to string b
45        return "The strings are {} edits away".format(distance[row][col])
46
similar questions
queries leading to this page
python person name matching