diff options
| -rw-r--r-- | ass1/binary_boyermoore.py | 32 | 
1 files changed, 24 insertions, 8 deletions
| diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index 42d7d01..a75b9b2 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -1,5 +1,5 @@ -# txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab" -# pat = "baab" +# txt = "zzzzzzzzzabczzzzzzzzzz" +# pat = "abczzzabc"  # m = len(pat)  # n = len(txt)  # R = [[m for __ in range(m)] for _ in range(0, 26)] @@ -48,7 +48,7 @@ def gusfield(string):  def gen_jump_table(pat):      m = len(pat) -    R = [[m for __ in range(m)] for _ in range(0, 26)] +    R = [[-1 for __ in range(m)] for _ in range(0, 26)]      for j in range(m):          for i in range(j):              R[alpha_number(pat[i])][j] = i @@ -92,14 +92,22 @@ def boyer_moore(pat, txt):      n = len(txt)      j = 0      occurrence = 0 +    start = None +    stop = None +    galil_index = -1      print("="*15) -    print(5*" " + txt) +    print(6*" " + txt)      while j <= n-m:          matches = 0          for i in range(m-1, -1, -1): -            print(f"{j=}  {' ' * j}", end="") +            # if galil_index == j and start <= i < stop: +            #     print("pee") +            #     i = max(start + 1, 1) +            #     continue + +            print(f"{j=:02}  {' ' * j}", end="")              for x in range(len(pat)):                  if x == i:                      print(pat[x].upper(), end="") @@ -117,16 +125,25 @@ def boyer_moore(pat, txt):                      break              else:                  mismatched = txt[j+i] -                bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R +                bad_char_shift = i-R[alpha_number(mismatched)][i]                  good_suffix_shift = 1                  if good_suffix[i+1] > 0:                      good_suffix_shift = m - good_suffix[i+1] +                    start = good_suffix[i+1] - m + i +                    stop = good_suffix[i+1] +                    next_galil = j+1                  elif good_suffix[i+1] == 0:                      good_suffix_shift = m - matched_prefix[i+1] +                    start = 1 +                    stop = matched_prefix[i+1] +                    next_galil = j+1                  elif matches == m:                      print(f"Match at {j}")                      good_suffix_shift = m - matched_prefix[1] +                # print(start, stop)                  j += max(good_suffix_shift, bad_char_shift) +                if good_suffix[i+1] >= 0 and good_suffix_shift >= bad_char_shift: +                    galil_index = j                  break      print(f"It found {occurrence} occurences.") @@ -134,5 +151,4 @@ def boyer_moore(pat, txt):      for i, a in enumerate(R):          print(chr(i+97), a) -pat = "abbabba" -boyer_moore(pat, "zzzzzzzzzzzzzzzabbazzzzzzzzzzzzzzzzzzzzz")
\ No newline at end of file +boyer_moore("abcdxxxabcd", "zzzzzzabcdxxxaaabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdbcdabcdxxxabcdabcdxxxabcdabcdxxxabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdabcdxxxabcdbcdabcdxxxabcdzzzzabcdxxxabcdabcdxxxabcdzabcdxxxabcdzzzzzzzzzz")
\ No newline at end of file | 
