aboutsummaryrefslogtreecommitdiff
path: root/ass1
diff options
context:
space:
mode:
authorakiyamn2021-03-24 19:47:08 +1100
committerakiyamn2021-03-24 19:47:08 +1100
commit8383d43a76d3d6b9eb47500f1920ddd08fff4df3 (patch)
tree2f22b1891b4c8019c8576bacc69c1f1c95fe2a1b /ass1
parentf0cdffeacc7e1996ac1fa97647105701f9300ef8 (diff)
downloadfit3155-8383d43a76d3d6b9eb47500f1920ddd08fff4df3.tar.gz
fit3155-8383d43a76d3d6b9eb47500f1920ddd08fff4df3.zip
Boyer-Moore but I made my own rule and 2D shift table
Diffstat (limited to 'ass1')
-rw-r--r--ass1/binary_boyermoore.py31
1 files changed, 19 insertions, 12 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index a7193ad..0c48b78 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -1,9 +1,8 @@
-txt = "zzzzaaabcdabcdazzzz"
-pat = "abcda"
-# pat = "abcda"
+txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab"
+pat = "baab"
m = len(pat)
n = len(txt)
-R = [m for _ in range(0, 26)]
+R = [[m for __ in range(m)] for _ in range(0, 26)]
good_suffix = [0 for _ in range(0, m+1)]
def alpha_number(char):
@@ -47,10 +46,11 @@ def gusfield(string):
l = i
return z
+for j in range(m):
+ for i in range(j):
+ print(pat[i], i)
+ R[alpha_number(pat[i])][j] = i
-for i in range(m):
- print(pat[i], i)
- R[alpha_number(pat[i])] = i
Z = reverse(gusfield(reverse(pat)))+[0]
print(list(pat))
@@ -70,10 +70,12 @@ for i in range(m-1, -1, -1):
print(matched_prefix)
print("="*15)
+
print(5*" " + txt)
j = 0
-while j < n-m:
+occurence = 0
+while j <= n-m:
matches = 0
for i in range(m-1, -1, -1):
print(f"{j=} {' ' * j}", end="")
@@ -83,7 +85,6 @@ while j < n-m:
else:
print(pat[x], end="")
print()
- # print(j)
match = pat[i] == txt[j+i]
if match:
matches += 1
@@ -92,11 +93,12 @@ while j < n-m:
good_suffix_shift = m - matched_prefix[1]
j += good_suffix_shift
i = m-1
+ occurence += 1
break
# print(i)
else:
mismatched = txt[j+i]
- bad_char_shift = i-R[alpha_number(mismatched)]
+ bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R
good_suffix_shift = 1
if good_suffix[i+1] > 0:
good_suffix_shift = m - good_suffix[i+1]
@@ -107,5 +109,10 @@ while j < n-m:
good_suffix_shift = m - matched_prefix[1]
j += max(good_suffix_shift, bad_char_shift)
break
- if j > n-m:
- break
+ # if j > n-m:
+ # break
+print(f"It found {occurence} occurences.")
+
+print(f"\n {list(range(m))}")
+for i, a in enumerate(R):
+ print(chr(i+97), a) \ No newline at end of file