diff options
Diffstat (limited to 'ass1/editdist.py')
| -rw-r--r-- | ass1/editdist.py | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/ass1/editdist.py b/ass1/editdist.py new file mode 100644 index 0000000..ae5b0d1 --- /dev/null +++ b/ass1/editdist.py @@ -0,0 +1,89 @@ +import sys + +OUTPUT_PATH = "output_editdist.txt" + +def compare(string, i, end): + for j in range(end): + if i+j == end or string[i+j] != string[j]: + return j + + +def gusfield(string): + z = [0 for _ in string] + z[0] = len(string) + r = 0 + l = 0 + for i in range(1, len(string)): + if i == 1: # base case + z[1] = compare(string, i, len(string)) + if z[1] > 0: + r = z[1] + 1 + l = 1 + elif i > r: # Case 1 + z[i] = compare(string, i, len(string)) + if z[i] > 0: + q = i + z[i] + r = q - 1 + l = i + elif i <= r: # Case 2 + if z[i-l] < r-i: # Case 2a + z[i] = z[i-l] + else: # Case 2b + q = compare(string, i, len(string)) + z[i] = q + r = q + l = i + return z + + +def reverse(string): + return string[::-1] + # new = " " * len(string) + # for i in range(len(new)): + # new[i] = string[-i] + + # return [string[c] for c in range(len(string)-1, -1, -1)] + + +def comapre_zs(z_prefix, z_suffix, pat_length): + with open(OUTPUT_PATH, "w") as file: + for i in range(pat_length+1, len(z_prefix)-pat_length-1): + distance = None + if z_prefix[i] == pat_length: + distance = 0 + elif z_prefix[i] + z_suffix[i+pat_length-1] >= pat_length-1: + distance = 1 + if distance is not None: + file.write(f"{i-pat_length-1} {distance}\n") + print(f"===={i-pat_length-1} {distance}") + print(i-pat_length-1, z_prefix[i], z_suffix[i+pat_length-1]) + + + +def edit_distance(txt, pat): + forwards = pat + "$" + txt + backwards = reverse(pat) + "$" + txt + print(list(forwards)) + print(list(backwards)) + print(gusfield(forwards)) + print(gusfield(backwards)) + print("="*15) + comapre_zs(gusfield(forwards), gusfield(backwards), len(pat)) + +def read_args(): + with open(sys.argv[1], "r") as txt_file: + txt = txt_file.read() + with open(sys.argv[2], "r") as pat_file: + pat = pat_file.read() + return txt, pat + +def main(): + if len(sys.argv) < 3: + print("Not enough arguments!") + else: + txt, pat = read_args() + edit_distance(txt, pat) + +if __name__ == '__main__': + main() + |
