aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorakiyamn2021-03-23 13:55:14 +1100
committerakiyamn2021-03-23 13:55:14 +1100
commita7d2f9a2cbaf5b4fc2135a37d80481105359c867 (patch)
tree0d525ee45782e73bb2537cd424ff839295abe4cc
parent0dfa5353c274deb9aa455475965cf060e8ce4bd9 (diff)
downloadfit3155-a7d2f9a2cbaf5b4fc2135a37d80481105359c867.tar.gz
fit3155-a7d2f9a2cbaf5b4fc2135a37d80481105359c867.zip
Boyer-Moore bad char rule
-rw-r--r--ass1/binary_boyermoore.py27
-rw-r--r--ass1/editdist.py31
-rw-r--r--ass1/output_editdist.txt1
-rw-r--r--ass1/txtfile.txt2
4 files changed, 54 insertions, 7 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
new file mode 100644
index 0000000..1d60f52
--- /dev/null
+++ b/ass1/binary_boyermoore.py
@@ -0,0 +1,27 @@
+txt = "xpbctbxabacbxtbpqa"
+pat = "tbapxab"
+m = len(pat)
+n = len(txt)
+R = [m for _ in range(0, 26)]
+
+def alpha_number(char):
+ return ord(char) - 97
+
+for i in range(m):
+ R[alpha_number(pat[i])] = i
+
+j = 0
+while j < n-m:
+ for i in range(m-1, -1, -1):
+ print(j)
+ match = pat[i] == txt[j+i]
+ if match:
+ pass
+ # print(i)
+ else:
+ mismatched = txt[j+i]
+ shift = i-R[alpha_number(mismatched)]
+ j += max(1, shift)
+ if j > n-m:
+ break
+print(R) \ No newline at end of file
diff --git a/ass1/editdist.py b/ass1/editdist.py
index ae5b0d1..c00fb69 100644
--- a/ass1/editdist.py
+++ b/ass1/editdist.py
@@ -51,24 +51,45 @@ def comapre_zs(z_prefix, z_suffix, pat_length):
distance = None
if z_prefix[i] == pat_length:
distance = 0
- elif z_prefix[i] + z_suffix[i+pat_length-1] >= pat_length-1:
+ elif z_prefix[i] + z_suffix[i] >= pat_length-1:
distance = 1
if distance is not None:
file.write(f"{i-pat_length-1} {distance}\n")
print(f"===={i-pat_length-1} {distance}")
- print(i-pat_length-1, z_prefix[i], z_suffix[i+pat_length-1])
-
+ print(i-pat_length-1, z_prefix[i], z_suffix[i])
+
+def match(z_prefix, z_suffix, pat_length):
+ matches = []
+ l = pat_length+1
+ r = len(z_prefix)-1
+ while l < len(z_prefix): # while l hasn't reached the end
+ if z_prefix[l] != 0 and z_suffix[r] != 0:
+ distance = None
+ if z_prefix[l] == pat_length:
+ distance = 0
+ elif z_prefix[l] + z_suffix[r] >= pat_length - 1:
+ distance = 1
+ if distance is not None:
+ matches.append((l-pat_length-1, distance))
+ l += 1
+ r += 1
+ if z_prefix[l] == 0:
+ l += 1
+ if z_suffix[r] == 0:
+ r -= 1
+ return matches
def edit_distance(txt, pat):
forwards = pat + "$" + txt
- backwards = reverse(pat) + "$" + txt
+ backwards = reverse(pat) + "$" + reverse(txt)
print(list(forwards))
print(list(backwards))
print(gusfield(forwards))
print(gusfield(backwards))
print("="*15)
- comapre_zs(gusfield(forwards), gusfield(backwards), len(pat))
+ # comapre_zs(gusfield(forwards), gusfield(backwards), len(pat))
+ print(match(gusfield(forwards), gusfield(backwards), len(pat)))
def read_args():
with open(sys.argv[1], "r") as txt_file:
diff --git a/ass1/output_editdist.txt b/ass1/output_editdist.txt
index 6e8183b..e69de29 100644
--- a/ass1/output_editdist.txt
+++ b/ass1/output_editdist.txt
@@ -1 +0,0 @@
-0 1
diff --git a/ass1/txtfile.txt b/ass1/txtfile.txt
index 9414491..d3bda8f 100644
--- a/ass1/txtfile.txt
+++ b/ass1/txtfile.txt
@@ -1 +1 @@
-yyybcdyyy \ No newline at end of file
+abxcdyabcdyacdyadbyyy \ No newline at end of file