aboutsummaryrefslogtreecommitdiff
path: root/ass1
diff options
context:
space:
mode:
authorakiyamn2021-03-25 20:29:06 +1100
committerakiyamn2021-03-25 20:29:06 +1100
commit61e3ed968cc6fccc7dfa749f6a86ef3175917252 (patch)
treec7eac145088379d7c01c7ea884a259deb874d1aa /ass1
parentd8ea8904f9e29dbefc044ac9ce5a16686cd0c78c (diff)
downloadfit3155-61e3ed968cc6fccc7dfa749f6a86ef3175917252.tar.gz
fit3155-61e3ed968cc6fccc7dfa749f6a86ef3175917252.zip
BM optimisations to generalisations and file reading
Diffstat (limited to 'ass1')
-rw-r--r--ass1/binary_boyermoore.py52
1 files changed, 36 insertions, 16 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index 7888017..e826016 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -5,6 +5,7 @@
# R = [[m for __ in range(m)] for _ in range(0, 26)]
# good_suffix = [0 for _ in range(0, m+1)]
+import sys
def alpha_number(char):
if char == "0" or char == "1":
@@ -27,7 +28,7 @@ def condense(binary, offset=0, size=2):
for i in range(offset, len(binary)-offset, size):
slice = binary[i:i+size]
if len(slice) == size:
- out += chr(97 + int(slice, size))
+ out += chr(97 + int(slice, 2))
return out
@@ -110,18 +111,18 @@ def boyer_moore(pat, txt):
galil_index = -1
comps = 0
- print("="*15)
- print(6*" " + txt)
+ # print("="*15)
+ # print(6*" " + txt)
i = m-1
while j <= n-m:
- print(f"{j=:02} {' ' * j}", end="")
- for x in range(len(pat)):
- if x == i:
- print(pat[x].upper(), end="")
- else:
- print(pat[x], end="")
- print()
+ # print(f"{j=:02} {' ' * j}", end="")
+ # for x in range(len(pat)):
+ # if x == i:
+ # print(pat[x].upper(), end="")
+ # else:
+ # print(pat[x], end="")
+ # print()
match = pat[i] == txt[j+i]
comps += 1
@@ -155,13 +156,17 @@ def boyer_moore(pat, txt):
print(f"{comps} comparisons.")
return comps, occurrence
+def two_to_the(n):
+ return 1 << n
+
def chunky_search(pat, txt, factor=2):
occurrence = 0
comps = 0
- for offset in range(1 << factor-1):
- padding = format(offset, f"0{factor-1}b")
+ for offset in range(two_to_the(factor-1)):
+ padding = format(offset, f"0{factor-1}b") if len(pat) % factor else ""
augmented_pat = f"{pat}{padding}"
- print(padding)
+ # print(padding)
+ print(condense(format(two_to_the(factor)-1, f"0b"), 0, factor))
c, o = boyer_moore(condense(augmented_pat, 0, factor), condense(txt, offset, factor))
comps += c
occurrence += o
@@ -170,14 +175,29 @@ def chunky_search(pat, txt, factor=2):
print(f"Chunky Optimisation: {occurrence} occurences in {comps} comparisons.")
print(f"Normal: {base_occur} occurences in {base_comps} comparisons.")
# assert base_occur == occurrence
- print(f"{comps*100/base_comps:.2f}% of normal Boyer-Moore")
+ if base_comps > 0:
+ print(f"{comps * 100 / base_comps:.3f}% of normal Boyer-Moore")
+ print(f"{comps * 100 / 642096:.3f}% of their Boyer-Moore")
+
+def read_args():
+ with open(sys.argv[1], "r") as txt_file:
+ txt = txt_file.read()
+ with open(sys.argv[2], "r") as pat_file:
+ pat = pat_file.read()
+ return txt, pat
# boyer_moore(condense("01100010"), condense(a))
# boyer_moore(condense("01100011"), condense(a, offset=1))
-F = 3
-chunky_search("100100100100100", "1111001100101011001110110001101101101011110101101101010101111111111011100110110000001110100100011110", factor=F)
+def main():
+ F = 3
+ if len(sys.argv) < 3:
+ print("Not enough arguments!")
+ else:
+ txt, pat = read_args()
+ chunky_search(pat, txt, factor=F)
+main()
# boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011")
# print(condense("1110000110"))
# print(condense("1110000110", offset=1))