diff options
| author | akiyamn | 2021-05-01 21:05:41 +1000 |
|---|---|---|
| committer | akiyamn | 2021-05-01 21:05:41 +1000 |
| commit | 3c60256c2bb81613e7ebc6919fd06d37f3bf318c (patch) | |
| tree | a61d3856d5e8e43ff9c5098e2a9b02e7ea16de0a /ass2/q3/lcp.py | |
| parent | 8159731e3a51027c44486caaa4458d9214753d43 (diff) | |
| download | fit3155-3c60256c2bb81613e7ebc6919fd06d37f3bf318c.tar.gz fit3155-3c60256c2bb81613e7ebc6919fd06d37f3bf318c.zip | |
Ass 2: Added letter to Ukkonens and Q3 working, Q4 almost done
Diffstat (limited to 'ass2/q3/lcp.py')
| -rw-r--r-- | ass2/q3/lcp.py | 108 |
1 files changed, 82 insertions, 26 deletions
diff --git a/ass2/q3/lcp.py b/ass2/q3/lcp.py index 96bd3ff..7e90ae0 100644 --- a/ass2/q3/lcp.py +++ b/ass2/q3/lcp.py @@ -1,4 +1,7 @@ -from ass2.ukkonen import ukkonen, skip_count, Point, char_is_after, Node +from random import randrange + +from ass2.ukkonen import ukkonen, Point, char_is_after, Node, skip_count +from itertools import product # s1 = "abdabc" @@ -6,38 +9,91 @@ from ass2.ukkonen import ukkonen, skip_count, Point, char_is_after, Node # s1 = "abaabaaba" # s2 = "baabaaabaa" -def longest_prefix(root: Node, s1: str, s2: str, suffixes: tuple): - print(s1[suffixes[0]:]) - print(s2[suffixes[1]:]) - matches = 0 - if suffixes[0] == 0: - point = Point(root, s1[0], 1) - else: - point = skip_count(suffixes[0], Point(root), 0) - for i in range(suffixes[1], len(s2) + 1): - print(s2[i], end=" ") - if char_is_after(point, s2[i]): - if point.is_explicit(): - point = Point(point.node, s2[i], 1) - else: - point = skip_count(1, point, i) - matches += 1 - print("found") +# def longest_prefix(root: Node, s1: str, s2: str, suffixes: tuple): +# print(s1[suffixes[0]:]) +# print(s2[suffixes[1]:]) +# matches = 0 +# if suffixes[0] == 0: +# point = Point(root, s1[0], 1) +# else: +# point = skip_count(suffixes[0], Point(root), 0) +# for i in range(suffixes[1], len(s2) + 1): +# print(s2[i], end=" ") +# if char_is_after(point, s2[i]): +# if point.is_explicit(): +# point = Point(point.node, s2[i], 1) +# else: +# point = skip_count(1, point, i) +# matches += 1 +# print("found") +# else: +# print("not found") +# break +# return matches + +def naive(s1, s2, offset): + count = 0 + for t in list(map(lambda x: x[0] == x[1], zip(s1[offset[0]:], s2[offset[1]:]))): + if t: + count += 1 + else: + return count + return count + + +def lcp(root: Node, s1: str, s2: str, offset: tuple): + index = 0 + head = Point(root) + shortest = min(len(s1) - offset[0], len(s2) - offset[1]) + while index < shortest: + if s1[index + offset[0]] == s2[index + offset[1]]: + going_to = head.node.get_child(s1[index + offset[0]]) + head.set_node(going_to) + index += going_to.edge_length else: - print("not found") - break - return matches + return index + return index + + +def test_everything_for(tree, s1, s2): + # print(s1, s2) + for i in range(len(s1)): + for j in range(len(s2)): + result = lcp(tree, s1, s2, (i, j)) + a = naive(s1, s2, (i, j)) + assert result == a + + +def new_string(max_letters, max_len): + return "".join([chr(randrange(ord("a"), ord("a") + max_letters)) for _ in range(0, max_len)]) + + +def test(max_letters, max_len): + alphabet = "".join(map(chr, range(ord("a"), ord("a") + max_letters))) + counter = 0 + while True: + s1, s2 = new_string(max_letters, max_len), new_string(max_letters, max_len) + tree = ukkonen(s1 + "z" + s2) + test_everything_for(tree, s1, s2) + counter += 1 + if counter % 1000 == 0: + print(counter) def main(): s1 = "abcdacbdab" s2 = "dacbdabc" - print(s1) - tree = ukkonen(s1) + print(s1 + "&" + s2) + tree = ukkonen(s1 + "&" + s2) tree.print_tree() - result = longest_prefix(tree, s1, s2, (0, 5)) - print(result) + + # tup = (4, 5) + # result = longest_prefix(tree, s1, s2, (0, 5)) + # result = lcp(tree, s1, s2, tup) + # print(naive(s1, s2, tup)) + # print(result) if __name__ == "__main__": - main() + test(4, 50) + # main() |
