aboutsummaryrefslogtreecommitdiff
path: root/ass2
diff options
context:
space:
mode:
authorakiyamn2021-04-25 19:42:09 +1000
committerakiyamn2021-04-25 19:42:09 +1000
commite347fd3246a4282d7fea85c1ae727e48c810480b (patch)
tree4424fc641e6b49e5cef007f778fb617c15684e70 /ass2
parentf1e1e2a64e30e93ce0d314f62855c16cb90e34c1 (diff)
downloadfit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.tar.gz
fit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.zip
Ass 2: Maybe possibly Ukkonen's could be working probably
Diffstat (limited to 'ass2')
-rw-r--r--ass2/q2/gen_cases.py104
-rw-r--r--ass2/q2/new_suffix_array.py47
2 files changed, 113 insertions, 38 deletions
diff --git a/ass2/q2/gen_cases.py b/ass2/q2/gen_cases.py
index f61222b..82d165e 100644
--- a/ass2/q2/gen_cases.py
+++ b/ass2/q2/gen_cases.py
@@ -1,15 +1,99 @@
-from itertools import product
+import math
+from itertools import product, accumulate, count, repeat
+from functools import reduce
from new_suffix_array import ukkonen
+import timeit
+import random
-MAX_LEN = 5
-NUM_LETTERS = 6
+def test(func, max_len, max_letters, num_iterations=math.inf):
+ alphabet = "".join(map(chr, range(ord("a"), ord("a") + max_letters)))
+ combos = map(lambda x: "".join(x), product(alphabet, repeat=max_len-1))
+ total_time = 0
+ longest = "", -1
+ i = 0
+ for i, string in enumerate(combos):
+ print(string, end=": ")
+ start = timeit.default_timer()
+ func(string)
+ stop = timeit.default_timer() - start
+ print(f"{(stop * 10 ** 3):.6f}ms")
+ if stop > longest[1]:
+ longest = string, stop
+ total_time += stop
+ if i >= num_iterations:
+ break
+ print("**** TEST ENDED ****")
+ print(f"Total time taken: {(total_time * 10):.6f}s = "
+ f"{(total_time * 10 ** 6):.3f}μs")
+ print(f"Longest string was '{longest[0]}' taking {(longest[1] * 10 ** 6):.3f}μs")
+ if i > 0:
+ print(f"Average time taken: {(total_time / i * 10):.6f}s = "
+ f"{(total_time / i * 10 ** 6):.3f}μs")
+ return total_time / i
-alphabet = "".join(map(chr, range(ord("a"), ord("a") + NUM_LETTERS)))
-combos = map(lambda x: "".join(x)+"$", product(alphabet, repeat=MAX_LEN))
-for i in range(100):
- try:
- ukkonen(next(combos))
- except StopIteration:
- print("**** TEST ENDED ****") \ No newline at end of file
+# test(ukkonen, 50, 6)
+
+# if __name__ == "__main__":
+# for length in range(2, 10):
+# print(f"==== {length=} ====")
+# before = test(ukkonen, length, 6)
+# after = test(ukkonen, length*2, 6)
+# print(f"From {length} -> {length*2}: Ratio = {after/before}")
+
+
+def random_string(length, variance=6):
+ return "".join(map(chr, [random.randrange(ord("A"), ord("A") + variance) for _ in range(length)]))
+
+
+def do_and_get_averages(func, length, num_times=100):
+ times = []
+ # the_range = range(ord("A"), ord("A")+26+26+6)
+ # letters = map(chr, the_range)
+ for string in [random_string(length) for _ in range(num_times)]:
+ average = 0
+ for i in range(5):
+ # print(string, end=": ")
+ start = timeit.default_timer()
+ func(string)
+ stop = timeit.default_timer() - start
+ average += stop
+ # print(f"{(stop * 10 ** 3):.6f}ms")
+ average /= 5
+ times.append(average)
+ return times
+
+
+# for i in range(50, 100):
+# first = sum(do_and_get_averages(ukkonen, i, 100))/100
+# second = sum(do_and_get_averages(ukkonen, i*2, 100))/100
+# print(i, first, second, second/first)
+
+def random_test(func, length, interval=10000):
+ i = 0
+ while True:
+ func(random_string(length, 6))
+ i += 1
+ if i % interval == 0:
+ print(i)
+
+random_test(ukkonen, 100)
+
+# times = []
+# # the_range = range(ord("A"), ord("A")+26+26+6)
+# # letters = map(chr, the_range)
+# for string in [random_string(20) for _ in range(100)]:
+# average = 0
+# for i in range(20):
+# # print(string, end=": ")
+# start = timeit.default_timer()
+# ukkonen(string)
+# stop = timeit.default_timer() - start
+# average += stop
+# # print(f"{(stop * 10 ** 3):.6f}ms")
+# average /= 5
+# times.append(average)
+# print(times)
+# for i in range(1, len(the_range)//2):
+# print(f"{i}:\t{times[i-1]}\t{times[(i-1)*2]}\t{times[(i-1)*2]/times[i-1]}")
diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py
index 3ec7af4..841ff67 100644
--- a/ass2/q2/new_suffix_array.py
+++ b/ass2/q2/new_suffix_array.py
@@ -141,14 +141,14 @@ def do_phase(root: Node, active: Point, i, last_j, remainder):
curr_char = Node.string[i]
match = char_is_after(active, curr_char)
if match:
- print(3)
+ # print(3)
remainder += 1
if node_just_created is not None:
node_just_created.link = active.node
active = skip_count(1, active, i)
did_rule_three = True
else:
- print(2)
+ # print(2)
if not active.is_explicit():
mediator = split_edge(active)
mediator.add_child(Node(i, "#"))
@@ -156,6 +156,8 @@ def do_phase(root: Node, active: Point, i, last_j, remainder):
node_just_created.link = mediator
node_just_created = mediator
active.length -= 1
+ if active.length == 0:
+ active.set_node(active.node)
else:
active.node.add_child(Node(i, "#"))
if node_just_created is not None and node_just_created.link is None:
@@ -163,10 +165,11 @@ def do_phase(root: Node, active: Point, i, last_j, remainder):
remainder = pos(remainder - 1)
active.set_node(active.node.link)
if remainder > 0:
- active = skip_count(remainder, root_point, i - remainder)
+ active = skip_count(remainder, Point(root), i - remainder)
last_j = j
j += 1
- root.print_tree()
+ # print(active)
+ # root.print_tree()
return active, remainder, last_j
@@ -177,7 +180,8 @@ def char_is_after(point: Point, char):
if point.length == point.edge_node.edge_length:
return Node.string[point.edge_node.start] == char
else: # If not at the end of an edge
- return Node.string[point.index_here() + point.length] == char
+ # return Node.string[point.index_here() + point.length] == char
+ return Node.string[point.index_here() + 1] == char
def skip_count(num_chars, start_point: Point, index):
@@ -198,7 +202,7 @@ def skip_count(num_chars, start_point: Point, index):
# Node.string[i] if head.node.root else Node.string[head.node.end_index + 1]
# assert head.node.end_index + 1 + chars_left < len(Node.string)
- while chars_left > incoming_length:
+ while chars_left > 0:
# assert head.node.end_index + 1 + chars_left < len(Node.string)
direction = Node.string[index]
next_node = head.node.get_child(direction)
@@ -211,45 +215,32 @@ def skip_count(num_chars, start_point: Point, index):
index += incoming_length
head.set_node(next_node)
- direction = Node.string[index]
+ # direction = Node.string[index]
if chars_left > 0: # Landed on an edge
- head.edge = direction
+ head.edge = Node.string[index]
head.length = chars_left
return head
def ukkonen(string):
+ string += "$"
Node.string = string
- # string += "$"
+ Node.global_end = 0
+ Node.all_nodes.clear()
n = len(string)
remainder = 0
last_j = 1
root = create_root()
root.add_child(Node(0, "#"))
- link_pending = root
- # next = Node(4, 6)
- # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#"))
- # root.add_child(Node(1, 6))
active = Point(root)
- for i in range(1, len(string)):
+ for i in range(1, n):
active, remainder, last_j = do_phase(root, active, i, last_j, remainder)
- place = Point(root, "a", 1)
- # print(char_is_after(place, "b"))
-
- # Node.global_end = len(string)
- # root.print_tree()
- # place = Point(next, "", 0)
- # print(place, place.index_here(), place.char_here())
- # place = skip_count(3, place)
- # place = skip_count(4, place)
- # place = skip_count(3, place)
- # print(place, place.index_here(), place.char_here())
- # split_edge(place)
-
if __name__ == "__main__":
- ukkonen("aaaab$")
+ # ukkonen("DEFDBEFFDDEFFFADEFFB")
+ ukkonen("abacabad")
+ print("done")
# ukkonen("abcbcbc$")