# txt = "zzzzzzzzzabczzzzzzzzzz" # pat = "abczzzabc" # m = len(pat) # n = len(txt) # R = [[m for __ in range(m)] for _ in range(0, 26)] # good_suffix = [0 for _ in range(0, m+1)] def alpha_number(char): return ord(char) - 97 def reverse(string): return string[::-1] def compare(string, i, end): for j in range(end): if i+j == end or string[i+j] != string[j]: return j def gusfield(string): z = [0 for _ in string] z[0] = len(string) r = 0 l = 0 for i in range(1, len(string)): if i == 1: # base case z[1] = compare(string, i, len(string)) if z[1] > 0: r = z[1] + 1 l = 1 elif i > r: # Case 1 z[i] = compare(string, i, len(string)) if z[i] > 0: q = i + z[i] r = q - 1 l = i elif i <= r: # Case 2 if z[i-l] < r-i: # Case 2a z[i] = z[i-l] else: # Case 2b q = compare(string, i, len(string)) z[i] = q r = q l = i return z def gen_jump_table(pat): m = len(pat) R = [[-1 for __ in range(m)] for _ in range(0, 26)] for j in range(m): for i in range(j): R[alpha_number(pat[i])][j] = i return R def gen_z_suffix(pat): return reverse(gusfield(reverse(pat)))+[0] # print(list(pat)) # print(R) # print(Z) def gen_good_suffix(pat, Z): m = len(pat) good_suffix = [0 for _ in range(0, m + 1)] for i in range(m): j = m - Z[i] good_suffix[j] = i+1 return good_suffix # print("g", good_suffix) def gen_matched_prefix(pat): m = len(pat) matched_prefix = gusfield(pat)+[0] for i in range(m-1, -1, -1): matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1]) return matched_prefix def preprocess(pat): R = gen_jump_table(pat) Z = gen_z_suffix(pat) good_suffix = gen_good_suffix(pat, Z) matched_prefix = gen_matched_prefix(pat) return R, good_suffix, matched_prefix def boyer_moore(pat, txt): R, good_suffix, matched_prefix = preprocess(pat) m = len(pat) n = len(txt) j = 0 occurrence = 0 start = None stop = None galil_index = -1 print("="*15) print(6*" " + txt) while j <= n-m: matches = 0 for i in range(m-1, -1, -1): # if galil_index == j and start <= i < stop: # print("pee") # i = max(start + 1, 1) # continue print(f"{j=:02} {' ' * j}", end="") for x in range(len(pat)): if x == i: print(pat[x].upper(), end="") else: print(pat[x], end="") print() match = pat[i] == txt[j+i] if match: matches += 1 if matches == m: good_suffix_shift = m - matched_prefix[1] j += good_suffix_shift i = m-1 occurrence += 1 break else: mismatched = txt[j+i] bad_char_shift = i-R[alpha_number(mismatched)][i] good_suffix_shift = 1 if good_suffix[i+1] > 0: good_suffix_shift = m - good_suffix[i+1] start = good_suffix[i+1] - m + i stop = good_suffix[i+1] next_galil = j+1 elif good_suffix[i+1] == 0: good_suffix_shift = m - matched_prefix[i+1] start = 1 stop = matched_prefix[i+1] next_galil = j+1 elif matches == m: print(f"Match at {j}") good_suffix_shift = m - matched_prefix[1] # print(start, stop) j += max(good_suffix_shift, bad_char_shift) if good_suffix[i+1] >= 0 and good_suffix_shift >= bad_char_shift: galil_index = j break print(f"It found {occurrence} occurences.") print(f"\n {list(range(m))}") for i, a in enumerate(R): print(chr(i+97), a) boyer_moore("abcdxxxabcd", "zzzzzzabcdxxxaaabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdbcdabcdxxxabcdabcdxxxabcdabcdxxxabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdbcdabcdxxxabcdzzzzabcdxxxabcdabcdxxxabcdzabcdxxxabcdzzzzzzzzzz")