1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
from random import randrange
from ass2.ukkonen import ukkonen, Point, char_is_after, Node, skip_count
from itertools import product
# s1 = "abdabc"
# s2 = "daba"
# s1 = "abaabaaba"
# s2 = "baabaaabaa"
# def longest_prefix(root: Node, s1: str, s2: str, suffixes: tuple):
# print(s1[suffixes[0]:])
# print(s2[suffixes[1]:])
# matches = 0
# if suffixes[0] == 0:
# point = Point(root, s1[0], 1)
# else:
# point = skip_count(suffixes[0], Point(root), 0)
# for i in range(suffixes[1], len(s2) + 1):
# print(s2[i], end=" ")
# if char_is_after(point, s2[i]):
# if point.is_explicit():
# point = Point(point.node, s2[i], 1)
# else:
# point = skip_count(1, point, i)
# matches += 1
# print("found")
# else:
# print("not found")
# break
# return matches
def naive(s1, s2, offset):
count = 0
for t in list(map(lambda x: x[0] == x[1], zip(s1[offset[0]:], s2[offset[1]:]))):
if t:
count += 1
else:
return count
return count
def lcp(root: Node, s1: str, s2: str, offset: tuple):
index = 0
head = Point(root)
shortest = min(len(s1) - offset[0], len(s2) - offset[1])
while index < shortest:
if s1[index + offset[0]] == s2[index + offset[1]]:
going_to = head.node.get_child(s1[index + offset[0]])
head.set_node(going_to)
index += going_to.edge_length
else:
return index
return index
def test_everything_for(tree, s1, s2):
# print(s1, s2)
for i in range(len(s1)):
for j in range(len(s2)):
result = lcp(tree, s1, s2, (i, j))
a = naive(s1, s2, (i, j))
assert result == a
def new_string(max_letters, max_len):
return "".join([chr(randrange(ord("a"), ord("a") + max_letters)) for _ in range(0, max_len)])
def test(max_letters, max_len):
alphabet = "".join(map(chr, range(ord("a"), ord("a") + max_letters)))
counter = 0
while True:
s1, s2 = new_string(max_letters, max_len), new_string(max_letters, max_len)
tree = ukkonen(s1 + "z" + s2)
test_everything_for(tree, s1, s2)
counter += 1
if counter % 1000 == 0:
print(counter)
def main():
s1 = "abcdacbdab"
s2 = "dacbdabc"
print(s1 + "&" + s2)
tree = ukkonen(s1 + "&" + s2)
tree.print_tree()
# tup = (4, 5)
# result = longest_prefix(tree, s1, s2, (0, 5))
# result = lcp(tree, s1, s2, tup)
# print(naive(s1, s2, tup))
# print(result)
if __name__ == "__main__":
test(4, 50)
# main()
|