aboutsummaryrefslogtreecommitdiff
path: root/ass1/binary_boyermoore.py
diff options
context:
space:
mode:
authorakiyamn2021-03-25 19:10:53 +1100
committerakiyamn2021-03-25 19:10:53 +1100
commit773c48d4069ef65dd8be6d61247f633679b58be0 (patch)
tree45d6612f75d60a0422b925cd746f8f2a5f8f9ce9 /ass1/binary_boyermoore.py
parent983d590ce17f9dd95319b0e15289b13016d35c58 (diff)
downloadfit3155-773c48d4069ef65dd8be6d61247f633679b58be0.tar.gz
fit3155-773c48d4069ef65dd8be6d61247f633679b58be0.zip
BM: chunky method works i think
Diffstat (limited to 'ass1/binary_boyermoore.py')
-rw-r--r--ass1/binary_boyermoore.py61
1 files changed, 53 insertions, 8 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index f226f6b..ebce071 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -6,7 +6,10 @@
# good_suffix = [0 for _ in range(0, m+1)]
def alpha_number(char):
- return int(char)
+ if char == "0" or char == "1":
+ return int(char)
+ return ord(char) - 97
+ # return int(char)
def reverse(string):
@@ -18,6 +21,15 @@ def compare(string, i, end):
if i+j == end or string[i+j] != string[j]:
return j
+def condense(binary, offset=0, size=2):
+ out = ""
+ for i in range(offset, len(binary)-offset, size):
+ slice = binary[i:i+2]
+ if len(slice) == size:
+ out += chr(97 + int(slice, size))
+ return out
+
+
def gusfield(string):
z = [0 for _ in string]
@@ -48,7 +60,7 @@ def gusfield(string):
def gen_jump_table(pat):
m = len(pat)
- R = [[-1 for __ in range(m)] for _ in range(0, 2)]
+ R = [[-1 for __ in range(m)] for _ in range(0, 26)]
for j in range(m):
for i in range(j+1):
R[alpha_number(pat[i])][j] = i
@@ -98,10 +110,18 @@ def boyer_moore(pat, txt):
comps = 0
print("="*15)
- # print(6*" " + txt)
+ print(6*" " + txt)
i = m-1
while j <= n-m:
+ print(f"{j=:02} {' ' * j}", end="")
+ for x in range(len(pat)):
+ if x == i:
+ print(pat[x].upper(), end="")
+ else:
+ print(pat[x], end="")
+ print()
+
match = pat[i] == txt[j+i]
comps += 1
if match:
@@ -113,22 +133,47 @@ def boyer_moore(pat, txt):
else:
i -= 1
else:
+ mismatched = txt[j + i]
+ bad_char_shift = i - R[alpha_number(mismatched)][i]
good_suffix_shift = 1
if good_suffix[i+1] > 0:
good_suffix_shift = m - good_suffix[i+1]
elif good_suffix[i+1] == 0:
good_suffix_shift = m - matched_prefix[i+1]
- j += good_suffix_shift
+ j += max(good_suffix_shift, bad_char_shift)
i = m-1
+
print(f"It found {occurrence} occurences.")
# print(f"\n {list(range(m))}")
- print("" + str(list(map(int, pat))))
+ # print("" + str(list(map(int, pat))))
# for i, a in enumerate(R):
# print(chr(i+97), a)
- print(good_suffix)
- print(matched_prefix)
+ # print(good_suffix)
+ # print(matched_prefix)
print(f"{comps} comparisons.")
+ return comps, occurrence
-boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011")
+def chunky_search(pat, txt, factor=2):
+ occurrence = 0
+ comps = 0
+ for offset in range(factor):
+ c, o = boyer_moore(condense(pat + str(offset), 0, factor), condense(txt, offset, factor))
+ comps += c
+ occurrence += o
+ base_comps, base_occur = boyer_moore(pat, txt)
+ print("*"*20)
+ print(f"Chunky Optimisation: {occurrence} occurences in {comps} comparisons.")
+ print(f"Normal: {base_occur} occurences in {base_comps} comparisons.")
+ # assert base_occur == occurrence
+ print(f"{comps*100/base_comps:.2f}% of normal Boyer-Moore")
+
+# boyer_moore(condense("01100010"), condense(a))
+# boyer_moore(condense("01100011"), condense(a, offset=1))
+
+chunky_search("1100011", "1111001100101011001110110001101101101011110101101101010101111111111011100110110000001110100100011110")
+
+# boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011")
+# print(condense("1110000110"))
+# print(condense("1110000110", offset=1))
# boyer_moore("111011011001110", "101010111010010101110111010101110100101011101101111011111111111101110111010101110100101011101101101110111010101110100101011101101101110111110110110011100000110101011101001010111011011011101100001010101110100101011101110101011101001010111011011011101110101011101001010111011011011101110101011101001010111011011011101100001101010111010010101110110110111011000010111011111011101110110101110110101101100000001011001010101010101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010100001011101111101110111011010111011010110110000000101100101010101010101011101001010111011011011101100001011101111101110111011010101110100101011101101101110110000101110111110101010111010010101110110110111011000010111011101010111010010101110110110111011000010111010101110100101011101101101010101110100101011101101101110110000101110111110111011101101011101101011011000000010110010101010101111011000010111011111011101110110101110110101101100000001011001010101010110111110111011101101011101101011011000000010110010101010101111011101110110101110110101101100000001011001010101010111101110110101110110101101100000001011001010101010110101110110101101100000001011001010101010110110111011000010101011101001010111011101010111010010101110110110111011101010111010010101110110110111011101010111010010101110110110111011000011010101110100101011101101101110110000101110111110111011101101011101101011011000000010110010101010101011101111101110111011010111011010110110000000101100101010101010000101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010101010111010010101110110110111011000010111011111011101110110101011101001010111011011011101100001011101111101010101110100101011101101101110110000101110111010101110100101011101101101110110000101110101011101001010111011011010101011101001010111011011011101100001011101111101110111011010111011010110110000000101100101010101011110110000101110111110111011101101011101101011011000000010110010101010101101111101110111011010111011010110110000000101100101010101011110111011101101011101101011011000000010110010101010101111011101101011101101011011000000010110010101010101101011101101011011000000010110010101010101101101110110000101110111110111011101101011101101011011000000010110010101010101101110111110111011101101011101101011011000000010110010101010101101110111110111011101101011101101011011000000010110010101010101011101111101110111011010111011010110110000000101100101010101010000101110111110111011101101011101101011011000000010110010101010101000010111011111011101110110101110110101101100000001011001010101010101010111101111111111111010010101110110110111011000010111011111011101110110101011101001010111011011011101100001011101111101010101110100101011101101101110110000101110111010101110100101011101101101110110000101110101011101001010111011011010101011101001010111011011011101100001011101111101110111011010111011010110110000000101100101010101011110110000101110111110111011101101011101101011011000000010110010101010101101111101110111011010111011010110110000000101100101010101011110111011101101011101101011011000000010110010101010101111011101101011101101011011000000010110010101010101101011101101011011000000010110010101010101101101110110000101110111110111011101101011101101011011000000010110010101010101") \ No newline at end of file