BM optimisations to generalisations and file reading

author: akiyamn 2021-03-25 20:29:06 +1100
committer: akiyamn 2021-03-25 20:29:06 +1100
commit: 61e3ed968cc6fccc7dfa749f6a86ef3175917252 (patch)
tree: c7eac145088379d7c01c7ea884a259deb874d1aa /ass1
parent: d8ea8904f9e29dbefc044ac9ce5a16686cd0c78c (diff)
download: fit3155-61e3ed968cc6fccc7dfa749f6a86ef3175917252.tar.gz
fit3155-61e3ed968cc6fccc7dfa749f6a86ef3175917252.zip
1 files changed, 36 insertions, 16 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index 7888017..e826016 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -5,6 +5,7 @@
 # R = [[m for __ in range(m)] for _ in range(0, 26)]
 # good_suffix = [0 for _ in range(0, m+1)]
 
+import sys
 
 def alpha_number(char):
     if char == "0" or char == "1":
@@ -27,7 +28,7 @@ def condense(binary, offset=0, size=2):
     for i in range(offset, len(binary)-offset, size):
         slice = binary[i:i+size]
         if len(slice) == size:
-            out += chr(97 + int(slice, size))
+            out += chr(97 + int(slice, 2))
     return out
 
 
@@ -110,18 +111,18 @@ def boyer_moore(pat, txt):
     galil_index = -1
     comps = 0
 
-    print("="*15)
-    print(6*" " + txt)
+    # print("="*15)
+    # print(6*" " + txt)
     i = m-1
 
     while j <= n-m:
-        print(f"{j=:02}  {' ' * j}", end="")
-        for x in range(len(pat)):
-            if x == i:
-                print(pat[x].upper(), end="")
-            else:
-                print(pat[x], end="")
-        print()
+        # print(f"{j=:02}  {' ' * j}", end="")
+        # for x in range(len(pat)):
+        #     if x == i:
+        #         print(pat[x].upper(), end="")
+        #     else:
+        #         print(pat[x], end="")
+        # print()
 
         match = pat[i] == txt[j+i]
         comps += 1
@@ -155,13 +156,17 @@ def boyer_moore(pat, txt):
     print(f"{comps} comparisons.")
     return comps, occurrence
 
+def two_to_the(n):
+    return 1 << n
+
 def chunky_search(pat, txt, factor=2):
     occurrence = 0
     comps = 0
-    for offset in range(1 << factor-1):
-        padding = format(offset, f"0{factor-1}b")
+    for offset in range(two_to_the(factor-1)):
+        padding = format(offset, f"0{factor-1}b") if len(pat) % factor else ""
         augmented_pat = f"{pat}{padding}"
-        print(padding)
+        # print(padding)
+        print(condense(format(two_to_the(factor)-1, f"0b"), 0, factor))
         c, o = boyer_moore(condense(augmented_pat, 0, factor), condense(txt, offset, factor))
         comps += c
         occurrence += o
@@ -170,14 +175,29 @@ def chunky_search(pat, txt, factor=2):
     print(f"Chunky Optimisation: {occurrence} occurences in {comps} comparisons.")
     print(f"Normal: {base_occur} occurences in {base_comps} comparisons.")
     # assert base_occur == occurrence
-    print(f"{comps*100/base_comps:.2f}% of normal Boyer-Moore")
+    if base_comps > 0:
+        print(f"{comps * 100 / base_comps:.3f}% of normal Boyer-Moore")
+        print(f"{comps * 100 / 642096:.3f}% of their Boyer-Moore")
+
+def read_args():
+    with open(sys.argv[1], "r") as txt_file:
+        txt = txt_file.read()
+    with open(sys.argv[2], "r") as pat_file:
+        pat = pat_file.read()
+    return txt, pat
 
 # boyer_moore(condense("01100010"), condense(a))
 # boyer_moore(condense("01100011"), condense(a, offset=1))
 
-F = 3
-chunky_search("100100100100100", "1111001100101011001110110001101101101011110101101101010101111111111011100110110000001110100100011110", factor=F)
+def main():
+    F = 3
+    if len(sys.argv) < 3:
+        print("Not enough arguments!")
+    else:
+        txt, pat = read_args()
+        chunky_search(pat, txt, factor=F)
 
+main()
 # boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011")
 # print(condense("1110000110"))
 # print(condense("1110000110", offset=1))
author	akiyamn	2021-03-25 20:29:06 +1100
committer	akiyamn	2021-03-25 20:29:06 +1100
commit	61e3ed968cc6fccc7dfa749f6a86ef3175917252 (patch)
tree	c7eac145088379d7c01c7ea884a259deb874d1aa /ass1
parent	d8ea8904f9e29dbefc044ac9ce5a16686cd0c78c (diff)
download	fit3155-61e3ed968cc6fccc7dfa749f6a86ef3175917252.tar.gz fit3155-61e3ed968cc6fccc7dfa749f6a86ef3175917252.zip