aboutsummaryrefslogtreecommitdiff
path: root/ass1/binary_boyermoore.py
diff options
context:
space:
mode:
Diffstat (limited to 'ass1/binary_boyermoore.py')
-rw-r--r--ass1/binary_boyermoore.py166
1 files changed, 93 insertions, 73 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index 0c48b78..42d7d01 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -1,9 +1,9 @@
-txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab"
-pat = "baab"
-m = len(pat)
-n = len(txt)
-R = [[m for __ in range(m)] for _ in range(0, 26)]
-good_suffix = [0 for _ in range(0, m+1)]
+# txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab"
+# pat = "baab"
+# m = len(pat)
+# n = len(txt)
+# R = [[m for __ in range(m)] for _ in range(0, 26)]
+# good_suffix = [0 for _ in range(0, m+1)]
def alpha_number(char):
return ord(char) - 97
@@ -46,73 +46,93 @@ def gusfield(string):
l = i
return z
-for j in range(m):
- for i in range(j):
- print(pat[i], i)
- R[alpha_number(pat[i])][j] = i
-
-
-Z = reverse(gusfield(reverse(pat)))+[0]
-print(list(pat))
-print(R)
-print(Z)
-
-for i in range(m):
- j = m - Z[i]
- good_suffix[j] = i+1
-
-print("g", good_suffix)
-
-matched_prefix = gusfield(pat)+[0]
-for i in range(m-1, -1, -1):
- matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1])
-
-print(matched_prefix)
-
-print("="*15)
-
-print(5*" " + txt)
-
-j = 0
-occurence = 0
-while j <= n-m:
- matches = 0
+def gen_jump_table(pat):
+ m = len(pat)
+ R = [[m for __ in range(m)] for _ in range(0, 26)]
+ for j in range(m):
+ for i in range(j):
+ R[alpha_number(pat[i])][j] = i
+ return R
+
+def gen_z_suffix(pat):
+ return reverse(gusfield(reverse(pat)))+[0]
+
+# print(list(pat))
+# print(R)
+# print(Z)
+
+def gen_good_suffix(pat, Z):
+ m = len(pat)
+ good_suffix = [0 for _ in range(0, m + 1)]
+ for i in range(m):
+ j = m - Z[i]
+ good_suffix[j] = i+1
+ return good_suffix
+
+# print("g", good_suffix)
+
+def gen_matched_prefix(pat):
+ m = len(pat)
+ matched_prefix = gusfield(pat)+[0]
for i in range(m-1, -1, -1):
- print(f"{j=} {' ' * j}", end="")
- for x in range(len(pat)):
- if x == i:
- print(pat[x].upper(), end="")
+ matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1])
+ return matched_prefix
+
+
+def preprocess(pat):
+ R = gen_jump_table(pat)
+ Z = gen_z_suffix(pat)
+ good_suffix = gen_good_suffix(pat, Z)
+ matched_prefix = gen_matched_prefix(pat)
+ return R, good_suffix, matched_prefix
+
+def boyer_moore(pat, txt):
+ R, good_suffix, matched_prefix = preprocess(pat)
+ m = len(pat)
+ n = len(txt)
+ j = 0
+ occurrence = 0
+
+ print("="*15)
+ print(5*" " + txt)
+
+ while j <= n-m:
+ matches = 0
+ for i in range(m-1, -1, -1):
+ print(f"{j=} {' ' * j}", end="")
+ for x in range(len(pat)):
+ if x == i:
+ print(pat[x].upper(), end="")
+ else:
+ print(pat[x], end="")
+ print()
+ match = pat[i] == txt[j+i]
+ if match:
+ matches += 1
+ if matches == m:
+ good_suffix_shift = m - matched_prefix[1]
+ j += good_suffix_shift
+ i = m-1
+ occurrence += 1
+ break
else:
- print(pat[x], end="")
- print()
- match = pat[i] == txt[j+i]
- if match:
- matches += 1
- if matches == m:
- # print(f" Match at {j}", end="")
- good_suffix_shift = m - matched_prefix[1]
- j += good_suffix_shift
- i = m-1
- occurence += 1
+ mismatched = txt[j+i]
+ bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R
+ good_suffix_shift = 1
+ if good_suffix[i+1] > 0:
+ good_suffix_shift = m - good_suffix[i+1]
+ elif good_suffix[i+1] == 0:
+ good_suffix_shift = m - matched_prefix[i+1]
+ elif matches == m:
+ print(f"Match at {j}")
+ good_suffix_shift = m - matched_prefix[1]
+ j += max(good_suffix_shift, bad_char_shift)
break
- # print(i)
- else:
- mismatched = txt[j+i]
- bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R
- good_suffix_shift = 1
- if good_suffix[i+1] > 0:
- good_suffix_shift = m - good_suffix[i+1]
- elif good_suffix[i+1] == 0:
- good_suffix_shift = m - matched_prefix[i+1]
- elif matches == m:
- print(f"Match at {j}")
- good_suffix_shift = m - matched_prefix[1]
- j += max(good_suffix_shift, bad_char_shift)
- break
- # if j > n-m:
- # break
-print(f"It found {occurence} occurences.")
-
-print(f"\n {list(range(m))}")
-for i, a in enumerate(R):
- print(chr(i+97), a) \ No newline at end of file
+
+ print(f"It found {occurrence} occurences.")
+ print(f"\n {list(range(m))}")
+ for i, a in enumerate(R):
+ print(chr(i+97), a)
+
+pat = "abbabba"
+boyer_moore(pat, "zzzzzzzzzzzzzzzabbazzzzzzzzzzzzzzzzzzzzz") \ No newline at end of file