diff options
| author | akiyamn | 2021-03-25 20:29:06 +1100 |
|---|---|---|
| committer | akiyamn | 2021-03-25 20:29:06 +1100 |
| commit | 61e3ed968cc6fccc7dfa749f6a86ef3175917252 (patch) | |
| tree | c7eac145088379d7c01c7ea884a259deb874d1aa /ass1/binary_boyermoore.py | |
| parent | d8ea8904f9e29dbefc044ac9ce5a16686cd0c78c (diff) | |
| download | fit3155-61e3ed968cc6fccc7dfa749f6a86ef3175917252.tar.gz fit3155-61e3ed968cc6fccc7dfa749f6a86ef3175917252.zip | |
BM optimisations to generalisations and file reading
Diffstat (limited to 'ass1/binary_boyermoore.py')
| -rw-r--r-- | ass1/binary_boyermoore.py | 52 |
1 files changed, 36 insertions, 16 deletions
diff --git a/ass1/binary_boyermoore.py b/ass1/binary_boyermoore.py index 7888017..e826016 100644 --- a/ass1/binary_boyermoore.py +++ b/ass1/binary_boyermoore.py @@ -5,6 +5,7 @@ # R = [[m for __ in range(m)] for _ in range(0, 26)] # good_suffix = [0 for _ in range(0, m+1)] +import sys def alpha_number(char): if char == "0" or char == "1": @@ -27,7 +28,7 @@ def condense(binary, offset=0, size=2): for i in range(offset, len(binary)-offset, size): slice = binary[i:i+size] if len(slice) == size: - out += chr(97 + int(slice, size)) + out += chr(97 + int(slice, 2)) return out @@ -110,18 +111,18 @@ def boyer_moore(pat, txt): galil_index = -1 comps = 0 - print("="*15) - print(6*" " + txt) + # print("="*15) + # print(6*" " + txt) i = m-1 while j <= n-m: - print(f"{j=:02} {' ' * j}", end="") - for x in range(len(pat)): - if x == i: - print(pat[x].upper(), end="") - else: - print(pat[x], end="") - print() + # print(f"{j=:02} {' ' * j}", end="") + # for x in range(len(pat)): + # if x == i: + # print(pat[x].upper(), end="") + # else: + # print(pat[x], end="") + # print() match = pat[i] == txt[j+i] comps += 1 @@ -155,13 +156,17 @@ def boyer_moore(pat, txt): print(f"{comps} comparisons.") return comps, occurrence +def two_to_the(n): + return 1 << n + def chunky_search(pat, txt, factor=2): occurrence = 0 comps = 0 - for offset in range(1 << factor-1): - padding = format(offset, f"0{factor-1}b") + for offset in range(two_to_the(factor-1)): + padding = format(offset, f"0{factor-1}b") if len(pat) % factor else "" augmented_pat = f"{pat}{padding}" - print(padding) + # print(padding) + print(condense(format(two_to_the(factor)-1, f"0b"), 0, factor)) c, o = boyer_moore(condense(augmented_pat, 0, factor), condense(txt, offset, factor)) comps += c occurrence += o @@ -170,14 +175,29 @@ def chunky_search(pat, txt, factor=2): print(f"Chunky Optimisation: {occurrence} occurences in {comps} comparisons.") print(f"Normal: {base_occur} occurences in {base_comps} comparisons.") # assert base_occur == occurrence - print(f"{comps*100/base_comps:.2f}% of normal Boyer-Moore") + if base_comps > 0: + print(f"{comps * 100 / base_comps:.3f}% of normal Boyer-Moore") + print(f"{comps * 100 / 642096:.3f}% of their Boyer-Moore") + +def read_args(): + with open(sys.argv[1], "r") as txt_file: + txt = txt_file.read() + with open(sys.argv[2], "r") as pat_file: + pat = pat_file.read() + return txt, pat # boyer_moore(condense("01100010"), condense(a)) # boyer_moore(condense("01100011"), condense(a, offset=1)) -F = 3 -chunky_search("100100100100100", "1111001100101011001110110001101101101011110101101101010101111111111011100110110000001110100100011110", factor=F) +def main(): + F = 3 + if len(sys.argv) < 3: + print("Not enough arguments!") + else: + txt, pat = read_args() + chunky_search(pat, txt, factor=F) +main() # boyer_moore("111000110", "01110111010101110100101011101101111011111111111111100011001110111010101110100101011101101101110111010110111010010101110110110111011111011011") # print(condense("1110000110")) # print(condense("1110000110", offset=1)) |
