aboutsummaryrefslogtreecommitdiff
path: root/ass1
diff options
context:
space:
mode:
authorakiyamn2021-03-24 21:41:48 +1100
committerakiyamn2021-03-24 21:41:48 +1100
commit9a607cb9b8670400bc77406ac056df058880976c (patch)
tree2c8f18d53cff9f37258ee63a5239f5510da81e76 /ass1
parent556fe538ddd5580f966d31d3167ecc764434fcb9 (diff)
downloadfit3155-9a607cb9b8670400bc77406ac056df058880976c.tar.gz
fit3155-9a607cb9b8670400bc77406ac056df058880976c.zip
Attempting Galil's
Diffstat (limited to 'ass1')
-rw-r--r--ass1/binary_boyermoore.py32
1 files changed, 24 insertions, 8 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index 42d7d01..a75b9b2 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -1,5 +1,5 @@
-# txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab"
-# pat = "baab"
+# txt = "zzzzzzzzzabczzzzzzzzzz"
+# pat = "abczzzabc"
# m = len(pat)
# n = len(txt)
# R = [[m for __ in range(m)] for _ in range(0, 26)]
@@ -48,7 +48,7 @@ def gusfield(string):
def gen_jump_table(pat):
m = len(pat)
- R = [[m for __ in range(m)] for _ in range(0, 26)]
+ R = [[-1 for __ in range(m)] for _ in range(0, 26)]
for j in range(m):
for i in range(j):
R[alpha_number(pat[i])][j] = i
@@ -92,14 +92,22 @@ def boyer_moore(pat, txt):
n = len(txt)
j = 0
occurrence = 0
+ start = None
+ stop = None
+ galil_index = -1
print("="*15)
- print(5*" " + txt)
+ print(6*" " + txt)
while j <= n-m:
matches = 0
for i in range(m-1, -1, -1):
- print(f"{j=} {' ' * j}", end="")
+ # if galil_index == j and start <= i < stop:
+ # print("pee")
+ # i = max(start + 1, 1)
+ # continue
+
+ print(f"{j=:02} {' ' * j}", end="")
for x in range(len(pat)):
if x == i:
print(pat[x].upper(), end="")
@@ -117,16 +125,25 @@ def boyer_moore(pat, txt):
break
else:
mismatched = txt[j+i]
- bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R
+ bad_char_shift = i-R[alpha_number(mismatched)][i]
good_suffix_shift = 1
if good_suffix[i+1] > 0:
good_suffix_shift = m - good_suffix[i+1]
+ start = good_suffix[i+1] - m + i
+ stop = good_suffix[i+1]
+ next_galil = j+1
elif good_suffix[i+1] == 0:
good_suffix_shift = m - matched_prefix[i+1]
+ start = 1
+ stop = matched_prefix[i+1]
+ next_galil = j+1
elif matches == m:
print(f"Match at {j}")
good_suffix_shift = m - matched_prefix[1]
+ # print(start, stop)
j += max(good_suffix_shift, bad_char_shift)
+ if good_suffix[i+1] >= 0 and good_suffix_shift >= bad_char_shift:
+ galil_index = j
break
print(f"It found {occurrence} occurences.")
@@ -134,5 +151,4 @@ def boyer_moore(pat, txt):
for i, a in enumerate(R):
print(chr(i+97), a)
-pat = "abbabba"
-boyer_moore(pat, "zzzzzzzzzzzzzzzabbazzzzzzzzzzzzzzzzzzzzz") \ No newline at end of file
+boyer_moore("abcdxxxabcd", "zzzzzzabcdxxxaaabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdbcdabcdxxxabcdabcdxxxabcdabcdxxxabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdbcdabcdxxxabcdzzzzabcdxxxabcdabcdxxxabcdzabcdxxxabcdzzzzzzzzzz") \ No newline at end of file