import sys OUTPUT_PATH = "output_editdist.txt" def compare(string, i, end): for j in range(end): if i+j == end or string[i+j] != string[j]: return j def gusfield(string): z = [0 for _ in string] z[0] = len(string) r = 0 l = 0 for i in range(1, len(string)): if i == 1: # base case z[1] = compare(string, i, len(string)) if z[1] > 0: r = z[1] + 1 l = 1 elif i > r: # Case 1 z[i] = compare(string, i, len(string)) if z[i] > 0: q = i + z[i] r = q - 1 l = i elif i <= r: # Case 2 if z[i-l] < r-i: # Case 2a z[i] = z[i-l] else: # Case 2b q = compare(string, i, len(string)) z[i] = q r = q l = i return z def reverse(string): return string[::-1] # new = " " * len(string) # for i in range(len(new)): # new[i] = string[-i] # return [string[c] for c in range(len(string)-1, -1, -1)] def comapre_zs(z_prefix, z_suffix, pat_length): with open(OUTPUT_PATH, "w") as file: for i in range(pat_length+1, len(z_prefix)-pat_length-1): distance = None if z_prefix[i] == pat_length: distance = 0 elif z_prefix[i] + z_suffix[i+pat_length-1] >= pat_length-1: distance = 1 if distance is not None: file.write(f"{i-pat_length-1} {distance}\n") print(f"===={i-pat_length-1} {distance}") print(i-pat_length-1, z_prefix[i], z_suffix[i+pat_length-1]) def edit_distance(txt, pat): forwards = pat + "$" + txt backwards = reverse(pat) + "$" + txt print(list(forwards)) print(list(backwards)) print(gusfield(forwards)) print(gusfield(backwards)) print("="*15) comapre_zs(gusfield(forwards), gusfield(backwards), len(pat)) def read_args(): with open(sys.argv[1], "r") as txt_file: txt = txt_file.read() with open(sys.argv[2], "r") as pat_file: pat = pat_file.read() return txt, pat def main(): if len(sys.argv) < 3: print("Not enough arguments!") else: txt, pat = read_args() edit_distance(txt, pat) if __name__ == '__main__': main()