diff options
| author | akiyamn | 2021-03-24 20:22:45 +1100 | 
|---|---|---|
| committer | akiyamn | 2021-03-24 20:22:45 +1100 | 
| commit | 556fe538ddd5580f966d31d3167ecc764434fcb9 (patch) | |
| tree | 3d456c9ed2cbe5fee454211a11ca126470b4ae6f | |
| parent | 5a14cf3b85939838369123aca555bb9f9d9f80b1 (diff) | |
| download | fit3155-556fe538ddd5580f966d31d3167ecc764434fcb9.tar.gz fit3155-556fe538ddd5580f966d31d3167ecc764434fcb9.zip | |
Clean boyer-moore
| -rw-r--r-- | ass1/binary_boyermoore.py | 166 | 
1 files changed, 93 insertions, 73 deletions
| diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index 0c48b78..42d7d01 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -1,9 +1,9 @@ -txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab" -pat = "baab" -m = len(pat) -n = len(txt) -R = [[m for __ in range(m)] for _ in range(0, 26)] -good_suffix = [0 for _ in range(0, m+1)] +# txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab" +# pat = "baab" +# m = len(pat) +# n = len(txt) +# R = [[m for __ in range(m)] for _ in range(0, 26)] +# good_suffix = [0 for _ in range(0, m+1)]  def alpha_number(char):      return ord(char) - 97 @@ -46,73 +46,93 @@ def gusfield(string):                  l = i      return z -for j in range(m): -    for i in range(j): -        print(pat[i], i) -        R[alpha_number(pat[i])][j] = i - - -Z = reverse(gusfield(reverse(pat)))+[0] -print(list(pat)) -print(R) -print(Z) - -for i in range(m): -    j = m - Z[i] -    good_suffix[j] = i+1 - -print("g", good_suffix) - -matched_prefix = gusfield(pat)+[0] -for i in range(m-1, -1, -1): -    matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1]) - -print(matched_prefix) - -print("="*15) - -print(5*" " + txt) - -j = 0 -occurence = 0 -while j <= n-m: -    matches = 0 +def gen_jump_table(pat): +    m = len(pat) +    R = [[m for __ in range(m)] for _ in range(0, 26)] +    for j in range(m): +        for i in range(j): +            R[alpha_number(pat[i])][j] = i +    return R + +def gen_z_suffix(pat): +    return reverse(gusfield(reverse(pat)))+[0] + +# print(list(pat)) +# print(R) +# print(Z) + +def gen_good_suffix(pat, Z): +    m = len(pat) +    good_suffix = [0 for _ in range(0, m + 1)] +    for i in range(m): +        j = m - Z[i] +        good_suffix[j] = i+1 +    return good_suffix + +# print("g", good_suffix) + +def gen_matched_prefix(pat): +    m = len(pat) +    matched_prefix = gusfield(pat)+[0]      for i in range(m-1, -1, -1): -        print(f"{j=}  {' ' * j}", end="") -        for x in range(len(pat)): -            if x == i: -                print(pat[x].upper(), end="") +        matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1]) +    return matched_prefix + + +def preprocess(pat): +    R = gen_jump_table(pat) +    Z = gen_z_suffix(pat) +    good_suffix = gen_good_suffix(pat, Z) +    matched_prefix = gen_matched_prefix(pat) +    return R, good_suffix, matched_prefix + +def boyer_moore(pat, txt): +    R, good_suffix, matched_prefix = preprocess(pat) +    m = len(pat) +    n = len(txt) +    j = 0 +    occurrence = 0 + +    print("="*15) +    print(5*" " + txt) + +    while j <= n-m: +        matches = 0 +        for i in range(m-1, -1, -1): +            print(f"{j=}  {' ' * j}", end="") +            for x in range(len(pat)): +                if x == i: +                    print(pat[x].upper(), end="") +                else: +                    print(pat[x], end="") +            print() +            match = pat[i] == txt[j+i] +            if match: +                matches += 1 +                if matches == m: +                    good_suffix_shift = m - matched_prefix[1] +                    j += good_suffix_shift +                    i = m-1 +                    occurrence += 1 +                    break              else: -                print(pat[x], end="") -        print() -        match = pat[i] == txt[j+i] -        if match: -            matches += 1 -            if matches == m: -                # print(f"    Match at {j}", end="") -                good_suffix_shift = m - matched_prefix[1] -                j += good_suffix_shift -                i = m-1 -                occurence += 1 +                mismatched = txt[j+i] +                bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R +                good_suffix_shift = 1 +                if good_suffix[i+1] > 0: +                    good_suffix_shift = m - good_suffix[i+1] +                elif good_suffix[i+1] == 0: +                    good_suffix_shift = m - matched_prefix[i+1] +                elif matches == m: +                    print(f"Match at {j}") +                    good_suffix_shift = m - matched_prefix[1] +                j += max(good_suffix_shift, bad_char_shift)                  break -            # print(i) -        else: -            mismatched = txt[j+i] -            bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R -            good_suffix_shift = 1 -            if good_suffix[i+1] > 0: -                good_suffix_shift = m - good_suffix[i+1] -            elif good_suffix[i+1] == 0: -                good_suffix_shift = m - matched_prefix[i+1] -            elif matches == m: -                print(f"Match at {j}") -                good_suffix_shift = m - matched_prefix[1] -            j += max(good_suffix_shift, bad_char_shift) -            break -            # if j > n-m: -            #     break -print(f"It found {occurence} occurences.") - -print(f"\n  {list(range(m))}") -for i, a in enumerate(R): -    print(chr(i+97), a)
\ No newline at end of file + +    print(f"It found {occurrence} occurences.") +    print(f"\n  {list(range(m))}") +    for i, a in enumerate(R): +        print(chr(i+97), a) + +pat = "abbabba" +boyer_moore(pat, "zzzzzzzzzzzzzzzabbazzzzzzzzzzzzzzzzzzzzz")
\ No newline at end of file | 
