aboutsummaryrefslogtreecommitdiff
path: root/ass1/editdist.py
diff options
context:
space:
mode:
Diffstat (limited to 'ass1/editdist.py')
-rw-r--r--ass1/editdist.py89
1 files changed, 89 insertions, 0 deletions
diff --git a/ass1/editdist.py b/ass1/editdist.py
new file mode 100644
index 0000000..ae5b0d1
--- /dev/null
+++ b/ass1/editdist.py
@@ -0,0 +1,89 @@
+import sys
+
+OUTPUT_PATH = "output_editdist.txt"
+
+def compare(string, i, end):
+ for j in range(end):
+ if i+j == end or string[i+j] != string[j]:
+ return j
+
+
+def gusfield(string):
+ z = [0 for _ in string]
+ z[0] = len(string)
+ r = 0
+ l = 0
+ for i in range(1, len(string)):
+ if i == 1: # base case
+ z[1] = compare(string, i, len(string))
+ if z[1] > 0:
+ r = z[1] + 1
+ l = 1
+ elif i > r: # Case 1
+ z[i] = compare(string, i, len(string))
+ if z[i] > 0:
+ q = i + z[i]
+ r = q - 1
+ l = i
+ elif i <= r: # Case 2
+ if z[i-l] < r-i: # Case 2a
+ z[i] = z[i-l]
+ else: # Case 2b
+ q = compare(string, i, len(string))
+ z[i] = q
+ r = q
+ l = i
+ return z
+
+
+def reverse(string):
+ return string[::-1]
+ # new = " " * len(string)
+ # for i in range(len(new)):
+ # new[i] = string[-i]
+
+ # return [string[c] for c in range(len(string)-1, -1, -1)]
+
+
+def comapre_zs(z_prefix, z_suffix, pat_length):
+ with open(OUTPUT_PATH, "w") as file:
+ for i in range(pat_length+1, len(z_prefix)-pat_length-1):
+ distance = None
+ if z_prefix[i] == pat_length:
+ distance = 0
+ elif z_prefix[i] + z_suffix[i+pat_length-1] >= pat_length-1:
+ distance = 1
+ if distance is not None:
+ file.write(f"{i-pat_length-1} {distance}\n")
+ print(f"===={i-pat_length-1} {distance}")
+ print(i-pat_length-1, z_prefix[i], z_suffix[i+pat_length-1])
+
+
+
+def edit_distance(txt, pat):
+ forwards = pat + "$" + txt
+ backwards = reverse(pat) + "$" + txt
+ print(list(forwards))
+ print(list(backwards))
+ print(gusfield(forwards))
+ print(gusfield(backwards))
+ print("="*15)
+ comapre_zs(gusfield(forwards), gusfield(backwards), len(pat))
+
+def read_args():
+ with open(sys.argv[1], "r") as txt_file:
+ txt = txt_file.read()
+ with open(sys.argv[2], "r") as pat_file:
+ pat = pat_file.read()
+ return txt, pat
+
+def main():
+ if len(sys.argv) < 3:
+ print("Not enough arguments!")
+ else:
+ txt, pat = read_args()
+ edit_distance(txt, pat)
+
+if __name__ == '__main__':
+ main()
+