diff options
| author | akiyamn | 2021-03-26 17:44:22 +1100 |
|---|---|---|
| committer | akiyamn | 2021-03-26 17:44:22 +1100 |
| commit | de0876157c4271a2ff271117ebd76bcd8d94d6da (patch) | |
| tree | 4c1dc157f25dc719fbf23ca059bb34c3209cf865 /ass1/binary_boyermoore.py | |
| parent | 61e3ed968cc6fccc7dfa749f6a86ef3175917252 (diff) | |
| download | fit3155-bm_bytes.tar.gz fit3155-bm_bytes.zip | |
BM: I think Galil's is workingbm_bytes
Diffstat (limited to 'ass1/binary_boyermoore.py')
| -rw-r--r-- | ass1/binary_boyermoore.py | 36 |
1 files changed, 29 insertions, 7 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index e826016..380eacf 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -100,21 +100,23 @@ def preprocess(pat): matched_prefix = gen_matched_prefix(pat) return R, good_suffix, matched_prefix + + def boyer_moore(pat, txt): R, good_suffix, matched_prefix = preprocess(pat) m = len(pat) n = len(txt) j = 0 occurrence = 0 - start = None - stop = None - galil_index = -1 + galils = 0 comps = 0 # print("="*15) # print(6*" " + txt) i = m-1 - + galil = False + start = None + stop = None while j <= n-m: # print(f"{j=:02} {' ' * j}", end="") # for x in range(len(pat)): @@ -123,10 +125,14 @@ def boyer_moore(pat, txt): # else: # print(pat[x], end="") # print() - match = pat[i] == txt[j+i] comps += 1 if match: + if galil and stop >= i > start: + galils += 1 + assert start >= 0 + i = max(start-1, 0) + galil = False if i == 0: good_suffix_shift = m - matched_prefix[1] j += good_suffix_shift @@ -135,18 +141,30 @@ def boyer_moore(pat, txt): else: i -= 1 else: + galil = False + mp = False + gs = False mismatched = txt[j + i] bad_char_shift = i - R[alpha_number(mismatched)][i] good_suffix_shift = 1 if good_suffix[i+1] > 0: good_suffix_shift = m - good_suffix[i+1] + gs = True + start = good_suffix[i+1] - m + i + 1 + stop = good_suffix[i+1] elif good_suffix[i+1] == 0: good_suffix_shift = m - matched_prefix[i+1] - j += max(good_suffix_shift, bad_char_shift) + mp = True + start = 0 + stop = matched_prefix[i + 1] + best_shift = max(good_suffix_shift, bad_char_shift) + j += best_shift + galil = (mp or gs) and best_shift == good_suffix_shift i = m-1 print(f"It found {occurrence} occurences.") + print(f"Galil'd {galils} times.") # print(f"\n {list(range(m))}") # print("" + str(list(map(int, pat)))) # for i, a in enumerate(R): @@ -190,7 +208,7 @@ def read_args(): # boyer_moore(condense("01100011"), condense(a, offset=1)) def main(): - F = 3 + F = 2 if len(sys.argv) < 3: print("Not enough arguments!") else: @@ -198,6 +216,10 @@ def main(): chunky_search(pat, txt, factor=F) main() + + + + # boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011") # print(condense("1110000110")) # print(condense("1110000110", offset=1)) |
