# Alexander Occhipinti - 29994705 - FIT3155 S1/2021 import sys def compare(string, i, end): for j in range(end): if i+j == end or string[i+j] != string[j]: return j def gusfield(string): z = [0 for _ in string] z[0] = len(string) r = 0 l = 0 for i in range(1, len(string)): if i == 1: # base case z[1] = compare(string, i, len(string)) if z[1] > 0: r = z[1] + 1 l = 1 elif i > r: # Case 1 z[i] = compare(string, i, len(string)) if z[i] > 0: q = i + z[i] r = q - 1 l = i elif i <= r: # Case 2 if z[i-l] < r-i: # Case 2a z[i] = z[i-l] else: # Case 2b q = compare(string, i, len(string)) z[i] = q r = q l = i return z def reverse(string): return string[::-1] def match(z_prefix, z_suffix, pat_length): matches = [] l = pat_length+1 r = len(z_prefix)-1 while l < len(z_prefix): # while l hasn't reached the end if z_prefix[l] != 0 and z_suffix[r] != 0: distance = None if z_prefix[l] == pat_length: distance = 0 # The left and right pointer align at an exact pattern match elif z_prefix[l] + z_suffix[r] >= pat_length - 1: distance = 1 # The left and right pointer align at a match of distance one if distance is not None: matches.append((l-pat_length-1, distance)) l += 1 r += 1 # Progress each pointer until matches are found if z_prefix[l] == 0: l += 1 if z_suffix[r] == 0: r -= 1 return matches def edit_distance(txt, pat): # Do Z algorithm twice, forwards (prefix) and backwards (suffix) forwards = pat + "$" + txt backwards = reverse(pat) + "$" + reverse(txt) # Match the Z_prefix values with the Z_suffix backwards tuples = match(gusfield(forwards), gusfield(backwards), len(pat)) print(f"Found {len(tuples)} matches of edit distance <= 1.") output(tuples) print("Done.") def output(tuples): OUTPUT_PATH = "output_editdist.txt" with open(OUTPUT_PATH, "w") as file: for pair in tuples: file.write(f"{pair[0]} {pair[1]}\n") def read_args(): with open(sys.argv[1], "r") as txt_file: txt = txt_file.read() with open(sys.argv[2], "r") as pat_file: pat = pat_file.read() return txt, pat def main(): if len(sys.argv) < 3: print("Not enough arguments!") else: txt, pat = read_args() edit_distance(txt, pat) main()