aboutsummaryrefslogtreecommitdiff
path: root/ass2/q3/lcp.py
diff options
context:
space:
mode:
authorakiyamn2021-05-01 21:05:41 +1000
committerakiyamn2021-05-01 21:05:41 +1000
commit3c60256c2bb81613e7ebc6919fd06d37f3bf318c (patch)
treea61d3856d5e8e43ff9c5098e2a9b02e7ea16de0a /ass2/q3/lcp.py
parent8159731e3a51027c44486caaa4458d9214753d43 (diff)
downloadfit3155-3c60256c2bb81613e7ebc6919fd06d37f3bf318c.tar.gz
fit3155-3c60256c2bb81613e7ebc6919fd06d37f3bf318c.zip
Ass 2: Added letter to Ukkonens and Q3 working, Q4 almost done
Diffstat (limited to 'ass2/q3/lcp.py')
-rw-r--r--ass2/q3/lcp.py108
1 files changed, 82 insertions, 26 deletions
diff --git a/ass2/q3/lcp.py b/ass2/q3/lcp.py
index 96bd3ff..7e90ae0 100644
--- a/ass2/q3/lcp.py
+++ b/ass2/q3/lcp.py
@@ -1,4 +1,7 @@
-from ass2.ukkonen import ukkonen, skip_count, Point, char_is_after, Node
+from random import randrange
+
+from ass2.ukkonen import ukkonen, Point, char_is_after, Node, skip_count
+from itertools import product
# s1 = "abdabc"
@@ -6,38 +9,91 @@ from ass2.ukkonen import ukkonen, skip_count, Point, char_is_after, Node
# s1 = "abaabaaba"
# s2 = "baabaaabaa"
-def longest_prefix(root: Node, s1: str, s2: str, suffixes: tuple):
- print(s1[suffixes[0]:])
- print(s2[suffixes[1]:])
- matches = 0
- if suffixes[0] == 0:
- point = Point(root, s1[0], 1)
- else:
- point = skip_count(suffixes[0], Point(root), 0)
- for i in range(suffixes[1], len(s2) + 1):
- print(s2[i], end=" ")
- if char_is_after(point, s2[i]):
- if point.is_explicit():
- point = Point(point.node, s2[i], 1)
- else:
- point = skip_count(1, point, i)
- matches += 1
- print("found")
+# def longest_prefix(root: Node, s1: str, s2: str, suffixes: tuple):
+# print(s1[suffixes[0]:])
+# print(s2[suffixes[1]:])
+# matches = 0
+# if suffixes[0] == 0:
+# point = Point(root, s1[0], 1)
+# else:
+# point = skip_count(suffixes[0], Point(root), 0)
+# for i in range(suffixes[1], len(s2) + 1):
+# print(s2[i], end=" ")
+# if char_is_after(point, s2[i]):
+# if point.is_explicit():
+# point = Point(point.node, s2[i], 1)
+# else:
+# point = skip_count(1, point, i)
+# matches += 1
+# print("found")
+# else:
+# print("not found")
+# break
+# return matches
+
+def naive(s1, s2, offset):
+ count = 0
+ for t in list(map(lambda x: x[0] == x[1], zip(s1[offset[0]:], s2[offset[1]:]))):
+ if t:
+ count += 1
+ else:
+ return count
+ return count
+
+
+def lcp(root: Node, s1: str, s2: str, offset: tuple):
+ index = 0
+ head = Point(root)
+ shortest = min(len(s1) - offset[0], len(s2) - offset[1])
+ while index < shortest:
+ if s1[index + offset[0]] == s2[index + offset[1]]:
+ going_to = head.node.get_child(s1[index + offset[0]])
+ head.set_node(going_to)
+ index += going_to.edge_length
else:
- print("not found")
- break
- return matches
+ return index
+ return index
+
+
+def test_everything_for(tree, s1, s2):
+ # print(s1, s2)
+ for i in range(len(s1)):
+ for j in range(len(s2)):
+ result = lcp(tree, s1, s2, (i, j))
+ a = naive(s1, s2, (i, j))
+ assert result == a
+
+
+def new_string(max_letters, max_len):
+ return "".join([chr(randrange(ord("a"), ord("a") + max_letters)) for _ in range(0, max_len)])
+
+
+def test(max_letters, max_len):
+ alphabet = "".join(map(chr, range(ord("a"), ord("a") + max_letters)))
+ counter = 0
+ while True:
+ s1, s2 = new_string(max_letters, max_len), new_string(max_letters, max_len)
+ tree = ukkonen(s1 + "z" + s2)
+ test_everything_for(tree, s1, s2)
+ counter += 1
+ if counter % 1000 == 0:
+ print(counter)
def main():
s1 = "abcdacbdab"
s2 = "dacbdabc"
- print(s1)
- tree = ukkonen(s1)
+ print(s1 + "&" + s2)
+ tree = ukkonen(s1 + "&" + s2)
tree.print_tree()
- result = longest_prefix(tree, s1, s2, (0, 5))
- print(result)
+
+ # tup = (4, 5)
+ # result = longest_prefix(tree, s1, s2, (0, 5))
+ # result = lcp(tree, s1, s2, tup)
+ # print(naive(s1, s2, tup))
+ # print(result)
if __name__ == "__main__":
- main()
+ test(4, 50)
+ # main()