From 9a607cb9b8670400bc77406ac056df058880976c Mon Sep 17 00:00:00 2001 From: akiyamn Date: Wed, 24 Mar 2021 21:41:48 +1100 Subject: Attempting Galil's --- ass1/binary_boyermoore.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index 42d7d01..a75b9b2 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -1,5 +1,5 @@ -# txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab" -# pat = "baab" +# txt = "zzzzzzzzzabczzzzzzzzzz" +# pat = "abczzzabc" # m = len(pat) # n = len(txt) # R = [[m for __ in range(m)] for _ in range(0, 26)] @@ -48,7 +48,7 @@ def gusfield(string): def gen_jump_table(pat): m = len(pat) - R = [[m for __ in range(m)] for _ in range(0, 26)] + R = [[-1 for __ in range(m)] for _ in range(0, 26)] for j in range(m): for i in range(j): R[alpha_number(pat[i])][j] = i @@ -92,14 +92,22 @@ def boyer_moore(pat, txt): n = len(txt) j = 0 occurrence = 0 + start = None + stop = None + galil_index = -1 print("="*15) - print(5*" " + txt) + print(6*" " + txt) while j <= n-m: matches = 0 for i in range(m-1, -1, -1): - print(f"{j=} {' ' * j}", end="") + # if galil_index == j and start <= i < stop: + # print("pee") + # i = max(start + 1, 1) + # continue + + print(f"{j=:02} {' ' * j}", end="") for x in range(len(pat)): if x == i: print(pat[x].upper(), end="") @@ -117,16 +125,25 @@ def boyer_moore(pat, txt): break else: mismatched = txt[j+i] - bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R + bad_char_shift = i-R[alpha_number(mismatched)][i] good_suffix_shift = 1 if good_suffix[i+1] > 0: good_suffix_shift = m - good_suffix[i+1] + start = good_suffix[i+1] - m + i + stop = good_suffix[i+1] + next_galil = j+1 elif good_suffix[i+1] == 0: good_suffix_shift = m - matched_prefix[i+1] + start = 1 + stop = matched_prefix[i+1] + next_galil = j+1 elif matches == m: print(f"Match at {j}") good_suffix_shift = m - matched_prefix[1] + # print(start, stop) j += max(good_suffix_shift, bad_char_shift) + if good_suffix[i+1] >= 0 and good_suffix_shift >= bad_char_shift: + galil_index = j break print(f"It found {occurrence} occurences.") @@ -134,5 +151,4 @@ def boyer_moore(pat, txt): for i, a in enumerate(R): print(chr(i+97), a) -pat = "abbabba" -boyer_moore(pat, "zzzzzzzzzzzzzzzabbazzzzzzzzzzzzzzzzzzzzz") \ No newline at end of file +boyer_moore("abcdxxxabcd", "zzzzzzabcdxxxaaabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdbcdabcdxxxabcdabcdxxxabcdabcdxxxabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdbcdabcdxxxabcdzzzzabcdxxxabcdabcdxxxabcdzabcdxxxabcdzzzzzzzzzz") \ No newline at end of file -- cgit v1.2.3