diff options
| author | akiyamn | 2021-03-23 16:26:18 +1100 |
|---|---|---|
| committer | akiyamn | 2021-03-23 16:26:18 +1100 |
| commit | de34b185172d1ce1b72400e9806600e3cd795f1b (patch) | |
| tree | d6418d08b991237df4d90922e50dcb366e36b65a /ass1/binary_boyermoore.py | |
| parent | a7d2f9a2cbaf5b4fc2135a37d80481105359c867 (diff) | |
| download | fit3155-de34b185172d1ce1b72400e9806600e3cd795f1b.tar.gz fit3155-de34b185172d1ce1b72400e9806600e3cd795f1b.zip | |
boyer-moore minus galil
Diffstat (limited to 'ass1/binary_boyermoore.py')
| -rw-r--r-- | ass1/binary_boyermoore.py | 84 |
1 files changed, 78 insertions, 6 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index 1d60f52..d9b813d 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -1,27 +1,99 @@ -txt = "xpbctbxabacbxtbpqa" -pat = "tbapxab" +txt = "zzzzzzabcdzzzzzz" +# pat = "tbapxab" +pat = "abcd" m = len(pat) n = len(txt) R = [m for _ in range(0, 26)] +good_suffix = [0 for _ in range(0, m+1)] def alpha_number(char): return ord(char) - 97 + +def reverse(string): + return string[::-1] + + +def compare(string, i, end): + for j in range(end): + if i+j == end or string[i+j] != string[j]: + return j + + +def gusfield(string): + z = [0 for _ in string] + z[0] = len(string) + r = 0 + l = 0 + for i in range(1, len(string)): + if i == 1: # base case + z[1] = compare(string, i, len(string)) + if z[1] > 0: + r = z[1] + 1 + l = 1 + elif i > r: # Case 1 + z[i] = compare(string, i, len(string)) + if z[i] > 0: + q = i + z[i] + r = q - 1 + l = i + elif i <= r: # Case 2 + if z[i-l] < r-i: # Case 2a + z[i] = z[i-l] + else: # Case 2b + q = compare(string, i, len(string)) + z[i] = q + r = q + l = i + return z + + for i in range(m): R[alpha_number(pat[i])] = i +Z = reverse(gusfield(reverse(pat)))+[0] +print(list(pat)) +print(Z) + +for i in range(m): + j = m - Z[i] + good_suffix[j] = i + +print(good_suffix) + +matched_prefix = gusfield(pat)+[0] +for i in range(m-1, -1, -1): + matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1]) + +print(matched_prefix) + j = 0 while j < n-m: + matches = 0 for i in range(m-1, -1, -1): print(j) match = pat[i] == txt[j+i] if match: - pass + matches += 1 + if matches == m: + print(f"Match at {j}") + good_suffix_shift = m - matched_prefix[1] + j += good_suffix_shift + i = m-1 + break # print(i) else: mismatched = txt[j+i] - shift = i-R[alpha_number(mismatched)] - j += max(1, shift) + bad_char_shift = i-R[alpha_number(mismatched)] + good_suffix_shift = 1 + if good_suffix[i+1] > 0: + good_suffix_shift = m - good_suffix[i+1]-1 + elif good_suffix[i+1] == 0: + good_suffix_shift = m - matched_prefix[i+1] + elif matches == m: + print(f"Match at {j}") + good_suffix_shift = m - matched_prefix[1] + j += max(good_suffix_shift, bad_char_shift) if j > n-m: break -print(R)
\ No newline at end of file +print(R) |
