From a7d2f9a2cbaf5b4fc2135a37d80481105359c867 Mon Sep 17 00:00:00 2001 From: akiyamn Date: Tue, 23 Mar 2021 13:55:14 +1100 Subject: Boyer-Moore bad char rule --- ass1/binary_boyermoore.py | 27 +++++++++++++++++++++++++++ ass1/editdist.py | 31 ++++++++++++++++++++++++++----- ass1/output_editdist.txt | 1 - ass1/txtfile.txt | 2 +- 4 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 ass1/binary_boyermoore.py diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py new file mode 100644 index 0000000..1d60f52 --- /dev/null +++ b/ass1/binary_boyermoore.py @@ -0,0 +1,27 @@ +txt = "xpbctbxabacbxtbpqa" +pat = "tbapxab" +m = len(pat) +n = len(txt) +R = [m for _ in range(0, 26)] + +def alpha_number(char): + return ord(char) - 97 + +for i in range(m): + R[alpha_number(pat[i])] = i + +j = 0 +while j < n-m: + for i in range(m-1, -1, -1): + print(j) + match = pat[i] == txt[j+i] + if match: + pass + # print(i) + else: + mismatched = txt[j+i] + shift = i-R[alpha_number(mismatched)] + j += max(1, shift) + if j > n-m: + break +print(R) \ No newline at end of file diff --git a/ass1/editdist.py b/ass1/editdist.py index ae5b0d1..c00fb69 100644 --- a/ass1/editdist.py +++ b/ass1/editdist.py @@ -51,24 +51,45 @@ def comapre_zs(z_prefix, z_suffix, pat_length): distance = None if z_prefix[i] == pat_length: distance = 0 - elif z_prefix[i] + z_suffix[i+pat_length-1] >= pat_length-1: + elif z_prefix[i] + z_suffix[i] >= pat_length-1: distance = 1 if distance is not None: file.write(f"{i-pat_length-1} {distance}\n") print(f"===={i-pat_length-1} {distance}") - print(i-pat_length-1, z_prefix[i], z_suffix[i+pat_length-1]) - + print(i-pat_length-1, z_prefix[i], z_suffix[i]) + +def match(z_prefix, z_suffix, pat_length): + matches = [] + l = pat_length+1 + r = len(z_prefix)-1 + while l < len(z_prefix): # while l hasn't reached the end + if z_prefix[l] != 0 and z_suffix[r] != 0: + distance = None + if z_prefix[l] == pat_length: + distance = 0 + elif z_prefix[l] + z_suffix[r] >= pat_length - 1: + distance = 1 + if distance is not None: + matches.append((l-pat_length-1, distance)) + l += 1 + r += 1 + if z_prefix[l] == 0: + l += 1 + if z_suffix[r] == 0: + r -= 1 + return matches def edit_distance(txt, pat): forwards = pat + "$" + txt - backwards = reverse(pat) + "$" + txt + backwards = reverse(pat) + "$" + reverse(txt) print(list(forwards)) print(list(backwards)) print(gusfield(forwards)) print(gusfield(backwards)) print("="*15) - comapre_zs(gusfield(forwards), gusfield(backwards), len(pat)) + # comapre_zs(gusfield(forwards), gusfield(backwards), len(pat)) + print(match(gusfield(forwards), gusfield(backwards), len(pat))) def read_args(): with open(sys.argv[1], "r") as txt_file: diff --git a/ass1/output_editdist.txt b/ass1/output_editdist.txt index 6e8183b..e69de29 100644 --- a/ass1/output_editdist.txt +++ b/ass1/output_editdist.txt @@ -1 +0,0 @@ -0 1 diff --git a/ass1/txtfile.txt b/ass1/txtfile.txt index 9414491..d3bda8f 100644 --- a/ass1/txtfile.txt +++ b/ass1/txtfile.txt @@ -1 +1 @@ -yyybcdyyy \ No newline at end of file +abxcdyabcdyacdyadbyyy \ No newline at end of file -- cgit v1.2.3