diff options
| author | akiyamn | 2021-03-25 15:31:51 +1100 |
|---|---|---|
| committer | akiyamn | 2021-03-25 15:31:51 +1100 |
| commit | eebe71f99993fee680df317f36a3948f954c8e49 (patch) | |
| tree | a3e0c49ab46b4d1948e1b4ac8a7ff8e838daba81 /ass1/binary_boyermoore.py | |
| parent | 20d2ef74154723613ee8801b5f89aa108ca569b0 (diff) | |
| download | fit3155-eebe71f99993fee680df317f36a3948f954c8e49.tar.gz fit3155-eebe71f99993fee680df317f36a3948f954c8e49.zip | |
Boyer-Moore to binary
Diffstat (limited to 'ass1/binary_boyermoore.py')
| -rw-r--r-- | ass1/binary_boyermoore.py | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index b42cff7..aa5d0c5 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -6,7 +6,7 @@ # good_suffix = [0 for _ in range(0, m+1)] def alpha_number(char): - return ord(char) - 97 + return int(char) def reverse(string): @@ -48,9 +48,9 @@ def gusfield(string): def gen_jump_table(pat): m = len(pat) - R = [[-1 for __ in range(m)] for _ in range(0, 26)] + R = [[-1 for __ in range(m)] for _ in range(0, 2)] for j in range(m): - for i in range(j): + for i in range(j+1): R[alpha_number(pat[i])][j] = i return R @@ -95,6 +95,7 @@ def boyer_moore(pat, txt): start = None stop = None galil_index = -1 + comps = 0 print("="*15) print(6*" " + txt) @@ -115,6 +116,7 @@ def boyer_moore(pat, txt): print(pat[x], end="") print() match = pat[i] == txt[j+i] + comps += 1 if match: matches += 1 if matches == m: @@ -126,6 +128,7 @@ def boyer_moore(pat, txt): else: mismatched = txt[j+i] bad_char_shift = i-R[alpha_number(mismatched)][i] + good_suffix_shift = 1 if good_suffix[i+1] > 0: good_suffix_shift = m - good_suffix[i+1] @@ -142,8 +145,12 @@ def boyer_moore(pat, txt): good_suffix_shift = m - matched_prefix[1] # print(start, stop) j += max(good_suffix_shift, bad_char_shift) + # print(good_suffix_shift, bad_char_shift) if good_suffix_shift >= bad_char_shift: print("good suff", good_suffix_shift) + else: + print("bad char", bad_char_shift) + print(i) if good_suffix[i+1] >= 0 and good_suffix_shift >= bad_char_shift: galil_index = j break @@ -152,5 +159,6 @@ def boyer_moore(pat, txt): print(f"\n {list(range(m))}") for i, a in enumerate(R): print(chr(i+97), a) + print(f"{comps} comparisons.") -boyer_moore("xoxxxo", "xooxxoxoxoxoxoxoxxxoxxoxxxoxxoooxxxoxxxoxxxox")
\ No newline at end of file +boyer_moore("111101111111111", "101010111010010101110111010101110100101011101101111011111111111101110111010101110100101011101101101110111010101110100101011101101101110110000110101011101001010111011011011101100001010101110100101011101110101011101001010111011011011101110101011101001010111011011011101110101011101001010111011011011101100001101010111010010101110110110111011000010111011111011101110110101110110101101100000001011001010101010101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010100001011101111101110111011010111011010110110000000101100101010101010101011101001010111011011011101100001011101111101110111011010101110100101011101101101110110000101110111110101010111010010101110110110111011000010111011101010111010010101110110110111011000010111010101110100101011101101101010101110100101011101101101110110000101110111110111011101101011101101011011000000010110010101010101111011000010111011111011101110110101110110101101100000001011001010101010110111110111011101101011101101011011000000010110010101010101111011101110110101110110101101100000001011001010101010111101110110101110110101101100000001011001010101010110101110110101101100000001011001010101010110110111011000010101011101001010111011101010111010010101110110110111011101010111010010101110110110111011101010111010010101110110110111011000011010101110100101011101101101110110000101110111110111011101101011101101011011000000010110010101010101011101111101110111011010111011010110110000000101100101010101010000101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010101010111010010101110110110111011000010111011111011101110110101011101001010111011011011101100001011101111101010101110100101011101101101110110000101110111010101110100101011101101101110110000101110101011101001010111011011010101011101001010111011011011101100001011101111101110111011010111011010110110000000101100101010101011110110000101110111110111011101101011101101011011000000010110010101010101101111101110111011010111011010110110000000101100101010101011110111011101101011101101011011000000010110010101010101111011101101011101101011011000000010110010101010101101011101101011011000000010110010101010101101101110110000101110111110111011101101011101101011011000000010110010101010101101110111110111011101101011101101011011000000010110010101010101101110111110111011101101011101101011011000000010110010101010101011101111101110111011010111011010110110000000101100101010101010000101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010101010111101111111111111010010101110110110111011000010111011111011101110110101011101001010111011011011101100001011101111101010101110100101011101101101110110000101110111010101110100101011101101101110110000101110101011101001010111011011010101011101001010111011011011101100001011101111101110111011010111011010110110000000101100101010101011110110000101110111110111011101101011101101011011000000010110010101010101101111101110111011010111011010110110000000101100101010101011110111011101101011101101011011000000010110010101010101111011101101011101101011011000000010110010101010101101011101101011011000000010110010101010101101101110110000101110111110111011101101011101101011011000000010110010101010101")
\ No newline at end of file |
