diff options
| author | akiyamn | 2021-03-23 13:55:14 +1100 |
|---|---|---|
| committer | akiyamn | 2021-03-23 13:55:14 +1100 |
| commit | a7d2f9a2cbaf5b4fc2135a37d80481105359c867 (patch) | |
| tree | 0d525ee45782e73bb2537cd424ff839295abe4cc | |
| parent | 0dfa5353c274deb9aa455475965cf060e8ce4bd9 (diff) | |
| download | fit3155-a7d2f9a2cbaf5b4fc2135a37d80481105359c867.tar.gz fit3155-a7d2f9a2cbaf5b4fc2135a37d80481105359c867.zip | |
Boyer-Moore bad char rule
| -rw-r--r-- | ass1/binary_boyermoore.py | 27 | ||||
| -rw-r--r-- | ass1/editdist.py | 31 | ||||
| -rw-r--r-- | ass1/output_editdist.txt | 1 | ||||
| -rw-r--r-- | ass1/txtfile.txt | 2 |
4 files changed, 54 insertions, 7 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py new file mode 100644 index 0000000..1d60f52 --- /dev/null +++ b/ass1/binary_boyermoore.py @@ -0,0 +1,27 @@ +txt = "xpbctbxabacbxtbpqa" +pat = "tbapxab" +m = len(pat) +n = len(txt) +R = [m for _ in range(0, 26)] + +def alpha_number(char): + return ord(char) - 97 + +for i in range(m): + R[alpha_number(pat[i])] = i + +j = 0 +while j < n-m: + for i in range(m-1, -1, -1): + print(j) + match = pat[i] == txt[j+i] + if match: + pass + # print(i) + else: + mismatched = txt[j+i] + shift = i-R[alpha_number(mismatched)] + j += max(1, shift) + if j > n-m: + break +print(R)
\ No newline at end of file diff --git a/ass1/editdist.py b/ass1/editdist.py index ae5b0d1..c00fb69 100644 --- a/ass1/editdist.py +++ b/ass1/editdist.py @@ -51,24 +51,45 @@ def comapre_zs(z_prefix, z_suffix, pat_length): distance = None if z_prefix[i] == pat_length: distance = 0 - elif z_prefix[i] + z_suffix[i+pat_length-1] >= pat_length-1: + elif z_prefix[i] + z_suffix[i] >= pat_length-1: distance = 1 if distance is not None: file.write(f"{i-pat_length-1} {distance}\n") print(f"===={i-pat_length-1} {distance}") - print(i-pat_length-1, z_prefix[i], z_suffix[i+pat_length-1]) - + print(i-pat_length-1, z_prefix[i], z_suffix[i]) + +def match(z_prefix, z_suffix, pat_length): + matches = [] + l = pat_length+1 + r = len(z_prefix)-1 + while l < len(z_prefix): # while l hasn't reached the end + if z_prefix[l] != 0 and z_suffix[r] != 0: + distance = None + if z_prefix[l] == pat_length: + distance = 0 + elif z_prefix[l] + z_suffix[r] >= pat_length - 1: + distance = 1 + if distance is not None: + matches.append((l-pat_length-1, distance)) + l += 1 + r += 1 + if z_prefix[l] == 0: + l += 1 + if z_suffix[r] == 0: + r -= 1 + return matches def edit_distance(txt, pat): forwards = pat + "$" + txt - backwards = reverse(pat) + "$" + txt + backwards = reverse(pat) + "$" + reverse(txt) print(list(forwards)) print(list(backwards)) print(gusfield(forwards)) print(gusfield(backwards)) print("="*15) - comapre_zs(gusfield(forwards), gusfield(backwards), len(pat)) + # comapre_zs(gusfield(forwards), gusfield(backwards), len(pat)) + print(match(gusfield(forwards), gusfield(backwards), len(pat))) def read_args(): with open(sys.argv[1], "r") as txt_file: diff --git a/ass1/output_editdist.txt b/ass1/output_editdist.txt index 6e8183b..e69de29 100644 --- a/ass1/output_editdist.txt +++ b/ass1/output_editdist.txt @@ -1 +0,0 @@ -0 1 diff --git a/ass1/txtfile.txt b/ass1/txtfile.txt index 9414491..d3bda8f 100644 --- a/ass1/txtfile.txt +++ b/ass1/txtfile.txt @@ -1 +1 @@ -yyybcdyyy
\ No newline at end of file +abxcdyabcdyacdyadbyyy
\ No newline at end of file |
