diff options
| author | akiyamn | 2021-03-24 19:47:08 +1100 |
|---|---|---|
| committer | akiyamn | 2021-03-24 19:47:08 +1100 |
| commit | 8383d43a76d3d6b9eb47500f1920ddd08fff4df3 (patch) | |
| tree | 2f22b1891b4c8019c8576bacc69c1f1c95fe2a1b | |
| parent | f0cdffeacc7e1996ac1fa97647105701f9300ef8 (diff) | |
| download | fit3155-8383d43a76d3d6b9eb47500f1920ddd08fff4df3.tar.gz fit3155-8383d43a76d3d6b9eb47500f1920ddd08fff4df3.zip | |
Boyer-Moore but I made my own rule and 2D shift table
| -rw-r--r-- | ass1/binary_boyermoore.py | 31 |
1 files changed, 19 insertions, 12 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index a7193ad..0c48b78 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -1,9 +1,8 @@ -txt = "zzzzaaabcdabcdazzzz" -pat = "abcda" -# pat = "abcda" +txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab" +pat = "baab" m = len(pat) n = len(txt) -R = [m for _ in range(0, 26)] +R = [[m for __ in range(m)] for _ in range(0, 26)] good_suffix = [0 for _ in range(0, m+1)] def alpha_number(char): @@ -47,10 +46,11 @@ def gusfield(string): l = i return z +for j in range(m): + for i in range(j): + print(pat[i], i) + R[alpha_number(pat[i])][j] = i -for i in range(m): - print(pat[i], i) - R[alpha_number(pat[i])] = i Z = reverse(gusfield(reverse(pat)))+[0] print(list(pat)) @@ -70,10 +70,12 @@ for i in range(m-1, -1, -1): print(matched_prefix) print("="*15) + print(5*" " + txt) j = 0 -while j < n-m: +occurence = 0 +while j <= n-m: matches = 0 for i in range(m-1, -1, -1): print(f"{j=} {' ' * j}", end="") @@ -83,7 +85,6 @@ while j < n-m: else: print(pat[x], end="") print() - # print(j) match = pat[i] == txt[j+i] if match: matches += 1 @@ -92,11 +93,12 @@ while j < n-m: good_suffix_shift = m - matched_prefix[1] j += good_suffix_shift i = m-1 + occurence += 1 break # print(i) else: mismatched = txt[j+i] - bad_char_shift = i-R[alpha_number(mismatched)] + bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R good_suffix_shift = 1 if good_suffix[i+1] > 0: good_suffix_shift = m - good_suffix[i+1] @@ -107,5 +109,10 @@ while j < n-m: good_suffix_shift = m - matched_prefix[1] j += max(good_suffix_shift, bad_char_shift) break - if j > n-m: - break + # if j > n-m: + # break +print(f"It found {occurence} occurences.") + +print(f"\n {list(range(m))}") +for i, a in enumerate(R): + print(chr(i+97), a)
\ No newline at end of file |
