diff options
| author | akiyamn | 2021-03-26 17:44:22 +1100 | 
|---|---|---|
| committer | akiyamn | 2021-03-26 17:44:22 +1100 | 
| commit | de0876157c4271a2ff271117ebd76bcd8d94d6da (patch) | |
| tree | 4c1dc157f25dc719fbf23ca059bb34c3209cf865 /ass1/binary_boyermoore.py | |
| parent | 61e3ed968cc6fccc7dfa749f6a86ef3175917252 (diff) | |
| download | fit3155-de0876157c4271a2ff271117ebd76bcd8d94d6da.tar.gz fit3155-de0876157c4271a2ff271117ebd76bcd8d94d6da.zip | |
BM: I think Galil's is workingbm_bytes
Diffstat (limited to 'ass1/binary_boyermoore.py')
| -rw-r--r-- | ass1/binary_boyermoore.py | 36 | 
1 files changed, 29 insertions, 7 deletions
| diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index e826016..380eacf 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -100,21 +100,23 @@ def preprocess(pat):      matched_prefix = gen_matched_prefix(pat)      return R, good_suffix, matched_prefix + +  def boyer_moore(pat, txt):      R, good_suffix, matched_prefix = preprocess(pat)      m = len(pat)      n = len(txt)      j = 0      occurrence = 0 -    start = None -    stop = None -    galil_index = -1 +    galils = 0      comps = 0      # print("="*15)      # print(6*" " + txt)      i = m-1 - +    galil = False +    start = None +    stop = None      while j <= n-m:          # print(f"{j=:02}  {' ' * j}", end="")          # for x in range(len(pat)): @@ -123,10 +125,14 @@ def boyer_moore(pat, txt):          #     else:          #         print(pat[x], end="")          # print() -          match = pat[i] == txt[j+i]          comps += 1          if match: +            if galil and stop >= i > start: +                galils += 1 +                assert start >= 0 +                i = max(start-1, 0) +                galil = False              if i == 0:                  good_suffix_shift = m - matched_prefix[1]                  j += good_suffix_shift @@ -135,18 +141,30 @@ def boyer_moore(pat, txt):              else:                  i -= 1          else: +            galil = False +            mp = False +            gs = False              mismatched = txt[j + i]              bad_char_shift = i - R[alpha_number(mismatched)][i]              good_suffix_shift = 1              if good_suffix[i+1] > 0:                  good_suffix_shift = m - good_suffix[i+1] +                gs = True +                start = good_suffix[i+1] - m + i + 1 +                stop = good_suffix[i+1]              elif good_suffix[i+1] == 0:                  good_suffix_shift = m - matched_prefix[i+1] -            j += max(good_suffix_shift, bad_char_shift) +                mp = True +                start = 0 +                stop = matched_prefix[i + 1] +            best_shift = max(good_suffix_shift, bad_char_shift) +            j += best_shift +            galil = (mp or gs) and best_shift == good_suffix_shift              i = m-1      print(f"It found {occurrence} occurences.") +    print(f"Galil'd {galils} times.")      # print(f"\n  {list(range(m))}")      # print("" + str(list(map(int, pat))))      # for i, a in enumerate(R): @@ -190,7 +208,7 @@ def read_args():  # boyer_moore(condense("01100011"), condense(a, offset=1))  def main(): -    F = 3 +    F = 2      if len(sys.argv) < 3:          print("Not enough arguments!")      else: @@ -198,6 +216,10 @@ def main():          chunky_search(pat, txt, factor=F)  main() + + + +  # boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011")  # print(condense("1110000110"))  # print(condense("1110000110", offset=1)) | 
