diff options
| author | akiyamn | 2021-03-23 13:55:14 +1100 |
|---|---|---|
| committer | akiyamn | 2021-03-23 13:55:14 +1100 |
| commit | a7d2f9a2cbaf5b4fc2135a37d80481105359c867 (patch) | |
| tree | 0d525ee45782e73bb2537cd424ff839295abe4cc /ass1/editdist.py | |
| parent | 0dfa5353c274deb9aa455475965cf060e8ce4bd9 (diff) | |
| download | fit3155-a7d2f9a2cbaf5b4fc2135a37d80481105359c867.tar.gz fit3155-a7d2f9a2cbaf5b4fc2135a37d80481105359c867.zip | |
Boyer-Moore bad char rule
Diffstat (limited to 'ass1/editdist.py')
| -rw-r--r-- | ass1/editdist.py | 31 |
1 files changed, 26 insertions, 5 deletions
diff --git a/ass1/editdist.py b/ass1/editdist.py index ae5b0d1..c00fb69 100644 --- a/ass1/editdist.py +++ b/ass1/editdist.py @@ -51,24 +51,45 @@ def comapre_zs(z_prefix, z_suffix, pat_length): distance = None if z_prefix[i] == pat_length: distance = 0 - elif z_prefix[i] + z_suffix[i+pat_length-1] >= pat_length-1: + elif z_prefix[i] + z_suffix[i] >= pat_length-1: distance = 1 if distance is not None: file.write(f"{i-pat_length-1} {distance}\n") print(f"===={i-pat_length-1} {distance}") - print(i-pat_length-1, z_prefix[i], z_suffix[i+pat_length-1]) - + print(i-pat_length-1, z_prefix[i], z_suffix[i]) + +def match(z_prefix, z_suffix, pat_length): + matches = [] + l = pat_length+1 + r = len(z_prefix)-1 + while l < len(z_prefix): # while l hasn't reached the end + if z_prefix[l] != 0 and z_suffix[r] != 0: + distance = None + if z_prefix[l] == pat_length: + distance = 0 + elif z_prefix[l] + z_suffix[r] >= pat_length - 1: + distance = 1 + if distance is not None: + matches.append((l-pat_length-1, distance)) + l += 1 + r += 1 + if z_prefix[l] == 0: + l += 1 + if z_suffix[r] == 0: + r -= 1 + return matches def edit_distance(txt, pat): forwards = pat + "$" + txt - backwards = reverse(pat) + "$" + txt + backwards = reverse(pat) + "$" + reverse(txt) print(list(forwards)) print(list(backwards)) print(gusfield(forwards)) print(gusfield(backwards)) print("="*15) - comapre_zs(gusfield(forwards), gusfield(backwards), len(pat)) + # comapre_zs(gusfield(forwards), gusfield(backwards), len(pat)) + print(match(gusfield(forwards), gusfield(backwards), len(pat))) def read_args(): with open(sys.argv[1], "r") as txt_file: |
