aboutsummaryrefslogtreecommitdiff
path: root/ass1/binary_boyermoore.py
diff options
context:
space:
mode:
authorakiyamn2021-03-26 17:44:22 +1100
committerakiyamn2021-03-26 17:44:22 +1100
commitde0876157c4271a2ff271117ebd76bcd8d94d6da (patch)
tree4c1dc157f25dc719fbf23ca059bb34c3209cf865 /ass1/binary_boyermoore.py
parent61e3ed968cc6fccc7dfa749f6a86ef3175917252 (diff)
downloadfit3155-bm_bytes.tar.gz
fit3155-bm_bytes.zip
BM: I think Galil's is workingbm_bytes
Diffstat (limited to 'ass1/binary_boyermoore.py')
-rw-r--r--ass1/binary_boyermoore.py36
1 files changed, 29 insertions, 7 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index e826016..380eacf 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -100,21 +100,23 @@ def preprocess(pat):
matched_prefix = gen_matched_prefix(pat)
return R, good_suffix, matched_prefix
+
+
def boyer_moore(pat, txt):
R, good_suffix, matched_prefix = preprocess(pat)
m = len(pat)
n = len(txt)
j = 0
occurrence = 0
- start = None
- stop = None
- galil_index = -1
+ galils = 0
comps = 0
# print("="*15)
# print(6*" " + txt)
i = m-1
-
+ galil = False
+ start = None
+ stop = None
while j <= n-m:
# print(f"{j=:02} {' ' * j}", end="")
# for x in range(len(pat)):
@@ -123,10 +125,14 @@ def boyer_moore(pat, txt):
# else:
# print(pat[x], end="")
# print()
-
match = pat[i] == txt[j+i]
comps += 1
if match:
+ if galil and stop >= i > start:
+ galils += 1
+ assert start >= 0
+ i = max(start-1, 0)
+ galil = False
if i == 0:
good_suffix_shift = m - matched_prefix[1]
j += good_suffix_shift
@@ -135,18 +141,30 @@ def boyer_moore(pat, txt):
else:
i -= 1
else:
+ galil = False
+ mp = False
+ gs = False
mismatched = txt[j + i]
bad_char_shift = i - R[alpha_number(mismatched)][i]
good_suffix_shift = 1
if good_suffix[i+1] > 0:
good_suffix_shift = m - good_suffix[i+1]
+ gs = True
+ start = good_suffix[i+1] - m + i + 1
+ stop = good_suffix[i+1]
elif good_suffix[i+1] == 0:
good_suffix_shift = m - matched_prefix[i+1]
- j += max(good_suffix_shift, bad_char_shift)
+ mp = True
+ start = 0
+ stop = matched_prefix[i + 1]
+ best_shift = max(good_suffix_shift, bad_char_shift)
+ j += best_shift
+ galil = (mp or gs) and best_shift == good_suffix_shift
i = m-1
print(f"It found {occurrence} occurences.")
+ print(f"Galil'd {galils} times.")
# print(f"\n {list(range(m))}")
# print("" + str(list(map(int, pat))))
# for i, a in enumerate(R):
@@ -190,7 +208,7 @@ def read_args():
# boyer_moore(condense("01100011"), condense(a, offset=1))
def main():
- F = 3
+ F = 2
if len(sys.argv) < 3:
print("Not enough arguments!")
else:
@@ -198,6 +216,10 @@ def main():
chunky_search(pat, txt, factor=F)
main()
+
+
+
+
# boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011")
# print(condense("1110000110"))
# print(condense("1110000110", offset=1))