# txt = "zzzzzzzzzabczzzzzzzzzz"
# pat = "abczzzabc"
# m = len(pat)
# n = len(txt)
# R = [[m for __ in range(m)] for _ in range(0, 26)]
# good_suffix = [0 for _ in range(0, m+1)]

import sys

def alpha_number(char):
    if char == "0" or char == "1":
        return int(char)
    return ord(char) - 97


def reverse(string):
    return string[::-1]


def compare(string, i, end):
    for j in range(end):
        if i+j == end or string[i+j] != string[j]:
            return j

def condense(binary, offset=0, size=2):
    out = ""
    for i in range(offset, len(binary)-offset, size):
        slice = binary[i:i+size]
        if len(slice) == size:
            out += chr(97 + int(slice, 2))
    return out


def gusfield(string):
    z = [0 for _ in string]
    z[0] = len(string)
    r = 0
    l = 0
    for i in range(1, len(string)):
        if i == 1:  # base case
            z[1] = compare(string, i, len(string))
            if z[1] > 0:
                r = z[1] + 1
                l = 1
        elif i > r: # Case 1
            z[i] = compare(string, i, len(string))
            if z[i] > 0:
                q = i + z[i]
                r = q - 1
                l = i
        elif i <= r:  # Case 2
            if z[i-l] < r-i:  # Case 2a
                z[i] = z[i-l]
            else:  # Case 2b
                q = compare(string, i, len(string))
                z[i] = q
                r = q
                l = i
    return z

def gen_jump_table(pat):
    m = len(pat)
    R = [[-1 for __ in range(m)] for _ in range(0, 256)]
    for j in range(m):
        for i in range(j+1):
            R[alpha_number(pat[i])][j] = i
    return R

def gen_z_suffix(pat):
    return reverse(gusfield(reverse(pat)))+[0]

# print(list(pat))
# print(R)
# print(Z)

def gen_good_suffix(pat, Z):
    m = len(pat)
    good_suffix = [0 for _ in range(0, m + 1)]
    for i in range(m):
        j = m - Z[i]
        good_suffix[j] = i+1
    return good_suffix

# print("g", good_suffix)

def gen_matched_prefix(pat):
    m = len(pat)
    matched_prefix = gusfield(pat)+[0]
    for i in range(m-1, -1, -1):
        matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1])
    return matched_prefix


def preprocess(pat):
    R = gen_jump_table(pat)
    Z = gen_z_suffix(pat)
    good_suffix = gen_good_suffix(pat, Z)
    matched_prefix = gen_matched_prefix(pat)
    return R, good_suffix, matched_prefix


def boyer_moore(pat, txt):
    R, good_suffix, matched_prefix = preprocess(pat)
    m = len(pat)
    n = len(txt)
    i = m-1
    j = 0
    occurrences = []
    galils = 0
    comps = 0
    galil = False
    start = 0
    stop = 0
    while j <= n-m:
        match = pat[i] == txt[j+i]
        comps += 1
        if match:
            if galil and stop >= i > start:
                galils += 1
                i = max(start-1, 0)
                galil = False
            if i == 0:
                good_suffix_shift = m - matched_prefix[1]
                j += good_suffix_shift
                occurrences.append(j)
                i = m-1
            else:
                i -= 1
        else:
            mismatched = txt[j + i]
            bad_char_shift = i - R[alpha_number(mismatched)][i]
            good_suffix_shift = 1
            if good_suffix[i+1] > 0:
                good_suffix_shift = m - good_suffix[i+1]
                start = good_suffix[i+1] - m + i + 1
                stop = good_suffix[i+1]
            elif good_suffix[i+1] == 0:
                good_suffix_shift = m - matched_prefix[i+1]
                start = 0
                stop = matched_prefix[i + 1]
            best_shift = max(good_suffix_shift, bad_char_shift)
            j += best_shift
            galil = best_shift == good_suffix_shift
            i = m-1

    print(comps)
    return comps, occurrences

def two_to_the(n):
    return 1 << n

def chunky_search(pat, txt, factor=2):
    occurrences = []
    comps = 0
    for offset in range(two_to_the(factor-1)):
        padding = format(offset, f"0{factor-1}b") if len(pat) % factor else ""
        augmented_pat = f"{pat}{padding}"
        c, o = boyer_moore(condense(augmented_pat, 0, factor), condense(txt, offset, factor))
        comps += c
        print(offset, o)
        occurrences += o
    base_comps, base_occur = boyer_moore(pat, txt)
    print(base_occur[:20])
    print(occurrences[:10])
    print("*"*20)
    print(f"Chunky Optimisation: {len(occurrences)} occurences in {comps} comparisons.")
    print(f"Normal: {len(base_occur)} occurences in {base_comps} comparisons.")
    if base_comps > 0:
        print(f"{comps * 100 / base_comps:.3f}% of normal Boyer-Moore")
        print(f"{comps * 100 / 642096:.3f}% of their Boyer-Moore")
    return comps, occurrences

def read_args():
    with open(sys.argv[1], "r") as txt_file:
        txt = txt_file.read()
    with open(sys.argv[2], "r") as pat_file:
        pat = pat_file.read()
    return txt, pat

def output_matches(occurrences):
    with open("output_binary_boyermoore.txt", "w") as file:
        for o in occurrences:
            file.write(f"{o}\n")

def main():
    factor = 2
    if len(sys.argv) < 3:
        print("Not enough arguments!")
    else:
        txt, pat = read_args()
        comps, occurrences = chunky_search(pat, txt, factor)
        print(comps)
        output_matches(occurrences)

main()