aboutsummaryrefslogtreecommitdiff
path: root/ass1/binary_boyermoore.py
blob: 0c48b786a04447bf6f0a8a4b39670e1c61834bac (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
txt = "zzzzzzzzzbaabzzzzzzzzzzzzzzzbaab"
pat = "baab"
m = len(pat)
n = len(txt)
R = [[m for __ in range(m)] for _ in range(0, 26)]
good_suffix = [0 for _ in range(0, m+1)]

def alpha_number(char):
    return ord(char) - 97


def reverse(string):
    return string[::-1]


def compare(string, i, end):
    for j in range(end):
        if i+j == end or string[i+j] != string[j]:
            return j


def gusfield(string):
    z = [0 for _ in string]
    z[0] = len(string)
    r = 0
    l = 0
    for i in range(1, len(string)):
        if i == 1:  # base case
            z[1] = compare(string, i, len(string))
            if z[1] > 0:
                r = z[1] + 1
                l = 1
        elif i > r: # Case 1
            z[i] = compare(string, i, len(string))
            if z[i] > 0:
                q = i + z[i]
                r = q - 1
                l = i
        elif i <= r:  # Case 2
            if z[i-l] < r-i:  # Case 2a
                z[i] = z[i-l]
            else:  # Case 2b
                q = compare(string, i, len(string))
                z[i] = q
                r = q
                l = i
    return z

for j in range(m):
    for i in range(j):
        print(pat[i], i)
        R[alpha_number(pat[i])][j] = i


Z = reverse(gusfield(reverse(pat)))+[0]
print(list(pat))
print(R)
print(Z)

for i in range(m):
    j = m - Z[i]
    good_suffix[j] = i+1

print("g", good_suffix)

matched_prefix = gusfield(pat)+[0]
for i in range(m-1, -1, -1):
    matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1])

print(matched_prefix)

print("="*15)

print(5*" " + txt)

j = 0
occurence = 0
while j <= n-m:
    matches = 0
    for i in range(m-1, -1, -1):
        print(f"{j=}  {' ' * j}", end="")
        for x in range(len(pat)):
            if x == i:
                print(pat[x].upper(), end="")
            else:
                print(pat[x], end="")
        print()
        match = pat[i] == txt[j+i]
        if match:
            matches += 1
            if matches == m:
                # print(f"    Match at {j}", end="")
                good_suffix_shift = m - matched_prefix[1]
                j += good_suffix_shift
                i = m-1
                occurence += 1
                break
            # print(i)
        else:
            mismatched = txt[j+i]
            bad_char_shift = R[alpha_number(mismatched)][i]-1 # Used to be i-R
            good_suffix_shift = 1
            if good_suffix[i+1] > 0:
                good_suffix_shift = m - good_suffix[i+1]
            elif good_suffix[i+1] == 0:
                good_suffix_shift = m - matched_prefix[i+1]
            elif matches == m:
                print(f"Match at {j}")
                good_suffix_shift = m - matched_prefix[1]
            j += max(good_suffix_shift, bad_char_shift)
            break
            # if j > n-m:
            #     break
print(f"It found {occurence} occurences.")

print(f"\n  {list(range(m))}")
for i, a in enumerate(R):
    print(chr(i+97), a)