Ass 2: Added letter to Ukkonens and Q3 working, Q4 almost done

author: akiyamn 2021-05-01 21:05:41 +1000
committer: akiyamn 2021-05-01 21:05:41 +1000
commit: 3c60256c2bb81613e7ebc6919fd06d37f3bf318c (patch)
tree: a61d3856d5e8e43ff9c5098e2a9b02e7ea16de0a /ass2/q3/lcp.py
parent: 8159731e3a51027c44486caaa4458d9214753d43 (diff)
download: fit3155-3c60256c2bb81613e7ebc6919fd06d37f3bf318c.tar.gz
fit3155-3c60256c2bb81613e7ebc6919fd06d37f3bf318c.zip
1 files changed, 82 insertions, 26 deletions
diff --git a/ass2/q3/lcp.py b/ass2/q3/lcp.py
index 96bd3ff..7e90ae0 100644
--- a/ass2/q3/lcp.py
+++ b/ass2/q3/lcp.py
@@ -1,4 +1,7 @@
-from ass2.ukkonen import ukkonen, skip_count, Point, char_is_after, Node
+from random import randrange
+
+from ass2.ukkonen import ukkonen, Point, char_is_after, Node, skip_count
+from itertools import product
 
 
 # s1 = "abdabc"
@@ -6,38 +9,91 @@ from ass2.ukkonen import ukkonen, skip_count, Point, char_is_after, Node
 # s1 = "abaabaaba"
 # s2 = "baabaaabaa"
 
-def longest_prefix(root: Node, s1: str, s2: str, suffixes: tuple):
-    print(s1[suffixes[0]:])
-    print(s2[suffixes[1]:])
-    matches = 0
-    if suffixes[0] == 0:
-        point = Point(root, s1[0], 1)
-    else:
-        point = skip_count(suffixes[0], Point(root), 0)
-    for i in range(suffixes[1], len(s2) + 1):
-        print(s2[i], end=" ")
-        if char_is_after(point, s2[i]):
-            if point.is_explicit():
-                point = Point(point.node, s2[i], 1)
-            else:
-                point = skip_count(1, point, i)
-            matches += 1
-            print("found")
+# def longest_prefix(root: Node, s1: str, s2: str, suffixes: tuple):
+#     print(s1[suffixes[0]:])
+#     print(s2[suffixes[1]:])
+#     matches = 0
+#     if suffixes[0] == 0:
+#         point = Point(root, s1[0], 1)
+#     else:
+#         point = skip_count(suffixes[0], Point(root), 0)
+#     for i in range(suffixes[1], len(s2) + 1):
+#         print(s2[i], end=" ")
+#         if char_is_after(point, s2[i]):
+#             if point.is_explicit():
+#                 point = Point(point.node, s2[i], 1)
+#             else:
+#                 point = skip_count(1, point, i)
+#             matches += 1
+#             print("found")
+#         else:
+#             print("not found")
+#             break
+#     return matches
+
+def naive(s1, s2, offset):
+    count = 0
+    for t in list(map(lambda x: x[0] == x[1], zip(s1[offset[0]:], s2[offset[1]:]))):
+        if t:
+            count += 1
+        else:
+            return count
+    return count
+
+
+def lcp(root: Node, s1: str, s2: str, offset: tuple):
+    index = 0
+    head = Point(root)
+    shortest = min(len(s1) - offset[0], len(s2) - offset[1])
+    while index < shortest:
+        if s1[index + offset[0]] == s2[index + offset[1]]:
+            going_to = head.node.get_child(s1[index + offset[0]])
+            head.set_node(going_to)
+            index += going_to.edge_length
         else:
-            print("not found")
-            break
-    return matches
+            return index
+    return index
+
+
+def test_everything_for(tree, s1, s2):
+    # print(s1, s2)
+    for i in range(len(s1)):
+        for j in range(len(s2)):
+            result = lcp(tree, s1, s2, (i, j))
+            a = naive(s1, s2, (i, j))
+            assert result == a
+
+
+def new_string(max_letters, max_len):
+    return "".join([chr(randrange(ord("a"), ord("a") + max_letters)) for _ in range(0, max_len)])
+
+
+def test(max_letters, max_len):
+    alphabet = "".join(map(chr, range(ord("a"), ord("a") + max_letters)))
+    counter = 0
+    while True:
+        s1, s2 = new_string(max_letters, max_len), new_string(max_letters, max_len)
+        tree = ukkonen(s1 + "z" + s2)
+        test_everything_for(tree, s1, s2)
+        counter += 1
+        if counter % 1000 == 0:
+            print(counter)
 
 
 def main():
     s1 = "abcdacbdab"
     s2 = "dacbdabc"
-    print(s1)
-    tree = ukkonen(s1)
+    print(s1 + "&" + s2)
+    tree = ukkonen(s1 + "&" + s2)
     tree.print_tree()
-    result = longest_prefix(tree, s1, s2, (0, 5))
-    print(result)
+
+    # tup = (4, 5)
+    # result = longest_prefix(tree, s1, s2, (0, 5))
+    # result = lcp(tree, s1, s2, tup)
+    # print(naive(s1, s2, tup))
+    # print(result)
 
 
 if __name__ == "__main__":
-    main()
+    test(4, 50)
+    # main()
author	akiyamn	2021-05-01 21:05:41 +1000
committer	akiyamn	2021-05-01 21:05:41 +1000
commit	3c60256c2bb81613e7ebc6919fd06d37f3bf318c (patch)
tree	a61d3856d5e8e43ff9c5098e2a9b02e7ea16de0a /ass2/q3/lcp.py
parent	8159731e3a51027c44486caaa4458d9214753d43 (diff)
download	fit3155-3c60256c2bb81613e7ebc6919fd06d37f3bf318c.tar.gz fit3155-3c60256c2bb81613e7ebc6919fd06d37f3bf318c.zip