Boyer-Moore but I made my own rule and 2D shift table

author: akiyamn 2021-03-24 19:47:08 +1100
committer: akiyamn 2021-03-24 19:47:08 +1100
commit: 8383d43a76d3d6b9eb47500f1920ddd08fff4df3 (patch)
tree: 2f22b1891b4c8019c8576bacc69c1f1c95fe2a1b
parent: f0cdffeacc7e1996ac1fa97647105701f9300ef8 (diff)
download: fit3155-8383d43a76d3d6b9eb47500f1920ddd08fff4df3.tar.gz
fit3155-8383d43a76d3d6b9eb47500f1920ddd08fff4df3.zip
1 files changed, 19 insertions, 12 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index a7193ad..0c48b78 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -1,9 +1,8 @@
-txt = "zzzzaaabcdabcdazzzz"
-pat = "abcda"
-# pat = "abcda"
+txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab"
+pat = "baab"
 m = len(pat)
 n = len(txt)
-R = [m for _ in range(0, 26)]
+R = [[m for __ in range(m)] for _ in range(0, 26)]
 good_suffix = [0 for _ in range(0, m+1)]
 
 def alpha_number(char):
@@ -47,10 +46,11 @@ def gusfield(string):
                 l = i
     return z
 
+for j in range(m):
+    for i in range(j):
+        print(pat[i], i)
+        R[alpha_number(pat[i])][j] = i
 
-for i in range(m):
-    print(pat[i], i)
-    R[alpha_number(pat[i])] = i
 
 Z = reverse(gusfield(reverse(pat)))+[0]
 print(list(pat))
@@ -70,10 +70,12 @@ for i in range(m-1, -1, -1):
 print(matched_prefix)
 
 print("="*15)
+
 print(5*" " + txt)
 
 j = 0
-while j < n-m:
+occurence = 0
+while j <= n-m:
     matches = 0
     for i in range(m-1, -1, -1):
         print(f"{j=}  {' ' * j}", end="")
@@ -83,7 +85,6 @@ while j < n-m:
             else:
                 print(pat[x], end="")
         print()
-        # print(j)
         match = pat[i] == txt[j+i]
         if match:
             matches += 1
@@ -92,11 +93,12 @@ while j < n-m:
                 good_suffix_shift = m - matched_prefix[1]
                 j += good_suffix_shift
                 i = m-1
+                occurence += 1
                 break
             # print(i)
         else:
             mismatched = txt[j+i]
-            bad_char_shift = i-R[alpha_number(mismatched)]
+            bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R
             good_suffix_shift = 1
             if good_suffix[i+1] > 0:
                 good_suffix_shift = m - good_suffix[i+1]
@@ -107,5 +109,10 @@ while j < n-m:
                 good_suffix_shift = m - matched_prefix[1]
             j += max(good_suffix_shift, bad_char_shift)
             break
-            if j > n-m:
-                break
+            # if j > n-m:
+            #     break
+print(f"It found {occurence} occurences.")
+
+print(f"\n  {list(range(m))}")
+for i, a in enumerate(R):
+    print(chr(i+97), a)
+\ No newline at end of file
author	akiyamn	2021-03-24 19:47:08 +1100
committer	akiyamn	2021-03-24 19:47:08 +1100
commit	8383d43a76d3d6b9eb47500f1920ddd08fff4df3 (patch)
tree	2f22b1891b4c8019c8576bacc69c1f1c95fe2a1b
parent	f0cdffeacc7e1996ac1fa97647105701f9300ef8 (diff)
download	fit3155-8383d43a76d3d6b9eb47500f1920ddd08fff4df3.tar.gz fit3155-8383d43a76d3d6b9eb47500f1920ddd08fff4df3.zip