1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
txt = "zzzzaaabcdabcdazzzz"
pat = "abcda"
# pat = "abcda"
m = len(pat)
n = len(txt)
R = [m for _ in range(0, 26)]
good_suffix = [0 for _ in range(0, m+1)]
def alpha_number(char):
return ord(char) - 97
def reverse(string):
return string[::-1]
def compare(string, i, end):
for j in range(end):
if i+j == end or string[i+j] != string[j]:
return j
def gusfield(string):
z = [0 for _ in string]
z[0] = len(string)
r = 0
l = 0
for i in range(1, len(string)):
if i == 1: # base case
z[1] = compare(string, i, len(string))
if z[1] > 0:
r = z[1] + 1
l = 1
elif i > r: # Case 1
z[i] = compare(string, i, len(string))
if z[i] > 0:
q = i + z[i]
r = q - 1
l = i
elif i <= r: # Case 2
if z[i-l] < r-i: # Case 2a
z[i] = z[i-l]
else: # Case 2b
q = compare(string, i, len(string))
z[i] = q
r = q
l = i
return z
for i in range(m):
print(pat[i], i)
R[alpha_number(pat[i])] = i
Z = reverse(gusfield(reverse(pat)))+[0]
print(list(pat))
print(R)
print(Z)
for i in range(m):
j = m - Z[i]
good_suffix[j] = i+1
print("g", good_suffix)
matched_prefix = gusfield(pat)+[0]
for i in range(m-1, -1, -1):
matched_prefix[i] = max(matched_prefix[i], matched_prefix[i+1])
print(matched_prefix)
print("="*15)
print(5*" " + txt)
j = 0
while j < n-m:
matches = 0
for i in range(m-1, -1, -1):
print(f"{j=} {' ' * j}", end="")
for x in range(len(pat)):
if x == i:
print(pat[x].upper(), end="")
else:
print(pat[x], end="")
print()
# print(j)
match = pat[i] == txt[j+i]
if match:
matches += 1
if matches == m:
# print(f" Match at {j}", end="")
good_suffix_shift = m - matched_prefix[1]
j += good_suffix_shift
i = m-1
break
# print(i)
else:
mismatched = txt[j+i]
bad_char_shift = i-R[alpha_number(mismatched)]
good_suffix_shift = 1
if good_suffix[i+1] > 0:
good_suffix_shift = m - good_suffix[i+1]
elif good_suffix[i+1] == 0:
good_suffix_shift = m - matched_prefix[i+1]
elif matches == m:
print(f"Match at {j}")
good_suffix_shift = m - matched_prefix[1]
j += max(good_suffix_shift, bad_char_shift)
break
if j > n-m:
break
|