From 8383d43a76d3d6b9eb47500f1920ddd08fff4df3 Mon Sep 17 00:00:00 2001 From: akiyamn Date: Wed, 24 Mar 2021 19:47:08 +1100 Subject: Boyer-Moore but I made my own rule and 2D shift table --- ass1/binary_boyermoore.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index a7193ad..0c48b78 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -1,9 +1,8 @@ -txt = "zzzzaaabcdabcdazzzz" -pat = "abcda" -# pat = "abcda" +txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab" +pat = "baab" m = len(pat) n = len(txt) -R = [m for _ in range(0, 26)] +R = [[m for __ in range(m)] for _ in range(0, 26)] good_suffix = [0 for _ in range(0, m+1)] def alpha_number(char): @@ -47,10 +46,11 @@ def gusfield(string): l = i return z +for j in range(m): + for i in range(j): + print(pat[i], i) + R[alpha_number(pat[i])][j] = i -for i in range(m): - print(pat[i], i) - R[alpha_number(pat[i])] = i Z = reverse(gusfield(reverse(pat)))+[0] print(list(pat)) @@ -70,10 +70,12 @@ for i in range(m-1, -1, -1): print(matched_prefix) print("="*15) + print(5*" " + txt) j = 0 -while j < n-m: +occurence = 0 +while j <= n-m: matches = 0 for i in range(m-1, -1, -1): print(f"{j=} {' ' * j}", end="") @@ -83,7 +85,6 @@ while j < n-m: else: print(pat[x], end="") print() - # print(j) match = pat[i] == txt[j+i] if match: matches += 1 @@ -92,11 +93,12 @@ while j < n-m: good_suffix_shift = m - matched_prefix[1] j += good_suffix_shift i = m-1 + occurence += 1 break # print(i) else: mismatched = txt[j+i] - bad_char_shift = i-R[alpha_number(mismatched)] + bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R good_suffix_shift = 1 if good_suffix[i+1] > 0: good_suffix_shift = m - good_suffix[i+1] @@ -107,5 +109,10 @@ while j < n-m: good_suffix_shift = m - matched_prefix[1] j += max(good_suffix_shift, bad_char_shift) break - if j > n-m: - break + # if j > n-m: + # break +print(f"It found {occurence} occurences.") + +print(f"\n {list(range(m))}") +for i, a in enumerate(R): + print(chr(i+97), a) \ No newline at end of file -- cgit v1.2.3