aboutsummaryrefslogtreecommitdiff
path: root/ass1
diff options
context:
space:
mode:
Diffstat (limited to 'ass1')
-rw-r--r--ass1/binary_boyermoore.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py
index ebce071..7888017 100644
--- a/ass1/binary_boyermoore.py
+++ b/ass1/binary_boyermoore.py
@@ -5,6 +5,7 @@
# R = [[m for __ in range(m)] for _ in range(0, 26)]
# good_suffix = [0 for _ in range(0, m+1)]
+
def alpha_number(char):
if char == "0" or char == "1":
return int(char)
@@ -24,7 +25,7 @@ def compare(string, i, end):
def condense(binary, offset=0, size=2):
out = ""
for i in range(offset, len(binary)-offset, size):
- slice = binary[i:i+2]
+ slice = binary[i:i+size]
if len(slice) == size:
out += chr(97 + int(slice, size))
return out
@@ -60,7 +61,7 @@ def gusfield(string):
def gen_jump_table(pat):
m = len(pat)
- R = [[-1 for __ in range(m)] for _ in range(0, 26)]
+ R = [[-1 for __ in range(m)] for _ in range(0, 256)]
for j in range(m):
for i in range(j+1):
R[alpha_number(pat[i])][j] = i
@@ -157,8 +158,11 @@ def boyer_moore(pat, txt):
def chunky_search(pat, txt, factor=2):
occurrence = 0
comps = 0
- for offset in range(factor):
- c, o = boyer_moore(condense(pat + str(offset), 0, factor), condense(txt, offset, factor))
+ for offset in range(1 << factor-1):
+ padding = format(offset, f"0{factor-1}b")
+ augmented_pat = f"{pat}{padding}"
+ print(padding)
+ c, o = boyer_moore(condense(augmented_pat, 0, factor), condense(txt, offset, factor))
comps += c
occurrence += o
base_comps, base_occur = boyer_moore(pat, txt)
@@ -171,7 +175,8 @@ def chunky_search(pat, txt, factor=2):
# boyer_moore(condense("01100010"), condense(a))
# boyer_moore(condense("01100011"), condense(a, offset=1))
-chunky_search("1100011", "1111001100101011001110110001101101101011110101101101010101111111111011100110110000001110100100011110")
+F = 3
+chunky_search("100100100100100", "1111001100101011001110110001101101101011110101101101010101111111111011100110110000001110100100011110", factor=F)
# boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011")
# print(condense("1110000110"))