# bpy24. The Smith-Waterman algorithm in Python

### Here the high score, 12, happens to be the last column of last row. In general, this will not be true and the highest scoring entry could be located anywhere in the matrix. From the highest entry, we proceed to 0, using the pointer information in T. At each step to 0, we record the letter of sequence 1 and sequence2. There might be cases it is null, where a gap is recorded for the particular entry. This is shown in the printouts, where sometimes the string ‘-‘ is added.

`# bpy24.pyfrom __future__ import division, print_functionimport numpy as nppt ={'match': 2, 'mismatch': -1, 'gap': -1}def mch(alpha, beta):    if alpha == beta:        return pt['match']    elif alpha == '-' or beta == '-':        return pt['gap']    else:        return pt['mismatch']def water(s1, s2):    m, n = len(s1), len(s2)    H = np.zeros((m+1, n+1))        T = np.zeros((m+1, n+1))    max_score = 0    # Score, Pointer Matrix    for i in range(1, m + 1):        for j in range(1, n + 1):            sc_diag = H[i-1][j-1] + mch(s1[i-1], s2[j-1])            sc_up = H[i][j-1] + pt['gap']            sc_left = H[i-1][j] + pt['gap']            H[i][j] = max(0,sc_left, sc_up, sc_diag)            if H[i][j] == 0: T[i][j] = 0            if H[i][j] == sc_left: T[i][j] = 1            if H[i][j] == sc_up: T[i][j] = 2            if H[i][j] == sc_diag: T[i][j] = 3            if H[i][j] >= max_score:                max_i = i                max_j = j                max_score = H[i][j];        print('H=n',H,'n')    print('T=n',T,'n')    align1, align2 = '', ''    i,j = max_i,max_j        #Traceback    while T[i][j] != 0:        if T[i][j] == 3:            a1 = s1[i-1]            a2 = s2[j-1]            i -= 1            j -= 1        elif T[i][j] == 2:            a1 = '-'            a2 = s2[j-1]            j -= 1        elif T[i][j] == 1:            a1 = s1[i-1]            a2 = '-'            i -= 1        print('%s ---> a1 = %st a2 = %sn' % ('Add',a1,a2))        align1 += a1        align2 += a2    align1 = align1[::-1]    align2 = align2[::-1]    sym = ''    iden = 0    for i in range(len(align1)):        a1 = align1[i]        a2 = align2[i]        if a1 == a2:                            sym += a1            iden += 1        elif a1 != a2 and a1 != '-' and a2 != '-':             sym += ' '        elif a1 == '-' or a2 == '-':                      sym += ' '        identity = iden / len(align1) * 100    print('Identity = %f percent' % identity)    print('Score =', max_score)    print(align1)    print(sym)    print(align2)if __name__ == '__main__':    water('AGCACACA','ACACACTA')#H=# [[  0.   0.   0.   0.   0.   0.   0.   0.   0.]# [  0.   2.   1.   2.   1.   2.   1.   0.   2.]# [  0.   1.   1.   1.   1.   1.   1.   0.   1.]# [  0.   0.   3.   2.   3.   2.   3.   2.   1.]# [  0.   2.   2.   5.   4.   5.   4.   3.   4.]# [  0.   1.   4.   4.   7.   6.   7.   6.   5.]# [  0.   2.   3.   6.   6.   9.   8.   7.   8.]# [  0.   1.   4.   5.   8.   8.  11.  10.   9.]# [  0.   2.   3.   6.   7.  10.  10.  10.  12.]] ##T=# [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.]# [ 0.  3.  2.  3.  2.  3.  2.  2.  3.]# [ 0.  1.  3.  1.  3.  1.  3.  3.  1.]# [ 0.  1.  3.  2.  3.  2.  3.  2.  2.]# [ 0.  3.  1.  3.  2.  3.  2.  2.  3.]# [ 0.  1.  3.  1.  3.  2.  3.  2.  2.]# [ 0.  3.  1.  3.  1.  3.  2.  2.  3.]# [ 0.  1.  3.  1.  3.  1.  3.  2.  2.]# [ 0.  3.  1.  3.  1.  3.  1.  3.  3.]] ##Add ---> a1 = A  a2 = A##Add ---> a1 = -  a2 = T##Add ---> a1 = C  a2 = C##Add ---> a1 = A  a2 = A##Add ---> a1 = C  a2 = C##Add ---> a1 = A  a2 = A##Add ---> a1 = C  a2 = C##Add ---> a1 = G  a2 = -##Add ---> a1 = A  a2 = A##Identity = 77.777778 percent#Score = 12.0#AGCACAC-A#A CACAC A#A-CACACTA`