aboutsummaryrefslogtreecommitdiff
path: root/ass2/q3/lcp.py
blob: 7e90ae063eeb0e8c75c9af25882f9070380ed3af (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from random import randrange

from ass2.ukkonen import ukkonen, Point, char_is_after, Node, skip_count
from itertools import product


# s1 = "abdabc"
# s2 = "daba"
# s1 = "abaabaaba"
# s2 = "baabaaabaa"

# def longest_prefix(root: Node, s1: str, s2: str, suffixes: tuple):
#     print(s1[suffixes[0]:])
#     print(s2[suffixes[1]:])
#     matches = 0
#     if suffixes[0] == 0:
#         point = Point(root, s1[0], 1)
#     else:
#         point = skip_count(suffixes[0], Point(root), 0)
#     for i in range(suffixes[1], len(s2) + 1):
#         print(s2[i], end=" ")
#         if char_is_after(point, s2[i]):
#             if point.is_explicit():
#                 point = Point(point.node, s2[i], 1)
#             else:
#                 point = skip_count(1, point, i)
#             matches += 1
#             print("found")
#         else:
#             print("not found")
#             break
#     return matches

def naive(s1, s2, offset):
    count = 0
    for t in list(map(lambda x: x[0] == x[1], zip(s1[offset[0]:], s2[offset[1]:]))):
        if t:
            count += 1
        else:
            return count
    return count


def lcp(root: Node, s1: str, s2: str, offset: tuple):
    index = 0
    head = Point(root)
    shortest = min(len(s1) - offset[0], len(s2) - offset[1])
    while index < shortest:
        if s1[index + offset[0]] == s2[index + offset[1]]:
            going_to = head.node.get_child(s1[index + offset[0]])
            head.set_node(going_to)
            index += going_to.edge_length
        else:
            return index
    return index


def test_everything_for(tree, s1, s2):
    # print(s1, s2)
    for i in range(len(s1)):
        for j in range(len(s2)):
            result = lcp(tree, s1, s2, (i, j))
            a = naive(s1, s2, (i, j))
            assert result == a


def new_string(max_letters, max_len):
    return "".join([chr(randrange(ord("a"), ord("a") + max_letters)) for _ in range(0, max_len)])


def test(max_letters, max_len):
    alphabet = "".join(map(chr, range(ord("a"), ord("a") + max_letters)))
    counter = 0
    while True:
        s1, s2 = new_string(max_letters, max_len), new_string(max_letters, max_len)
        tree = ukkonen(s1 + "z" + s2)
        test_everything_for(tree, s1, s2)
        counter += 1
        if counter % 1000 == 0:
            print(counter)


def main():
    s1 = "abcdacbdab"
    s2 = "dacbdabc"
    print(s1 + "&" + s2)
    tree = ukkonen(s1 + "&" + s2)
    tree.print_tree()

    # tup = (4, 5)
    # result = longest_prefix(tree, s1, s2, (0, 5))
    # result = lcp(tree, s1, s2, tup)
    # print(naive(s1, s2, tup))
    # print(result)


if __name__ == "__main__":
    test(4, 50)
    # main()