diff options
| author | akiyamn | 2021-04-25 19:42:09 +1000 |
|---|---|---|
| committer | akiyamn | 2021-04-25 19:42:09 +1000 |
| commit | e347fd3246a4282d7fea85c1ae727e48c810480b (patch) | |
| tree | 4424fc641e6b49e5cef007f778fb617c15684e70 /ass2/q2 | |
| parent | f1e1e2a64e30e93ce0d314f62855c16cb90e34c1 (diff) | |
| download | fit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.tar.gz fit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.zip | |
Ass 2: Maybe possibly Ukkonen's could be working probably
Diffstat (limited to 'ass2/q2')
| -rw-r--r-- | ass2/q2/gen_cases.py | 104 | ||||
| -rw-r--r-- | ass2/q2/new_suffix_array.py | 47 |
2 files changed, 113 insertions, 38 deletions
diff --git a/ass2/q2/gen_cases.py b/ass2/q2/gen_cases.py index f61222b..82d165e 100644 --- a/ass2/q2/gen_cases.py +++ b/ass2/q2/gen_cases.py @@ -1,15 +1,99 @@ -from itertools import product +import math +from itertools import product, accumulate, count, repeat +from functools import reduce from new_suffix_array import ukkonen +import timeit +import random -MAX_LEN = 5 -NUM_LETTERS = 6 +def test(func, max_len, max_letters, num_iterations=math.inf): + alphabet = "".join(map(chr, range(ord("a"), ord("a") + max_letters))) + combos = map(lambda x: "".join(x), product(alphabet, repeat=max_len-1)) + total_time = 0 + longest = "", -1 + i = 0 + for i, string in enumerate(combos): + print(string, end=": ") + start = timeit.default_timer() + func(string) + stop = timeit.default_timer() - start + print(f"{(stop * 10 ** 3):.6f}ms") + if stop > longest[1]: + longest = string, stop + total_time += stop + if i >= num_iterations: + break + print("**** TEST ENDED ****") + print(f"Total time taken: {(total_time * 10):.6f}s = " + f"{(total_time * 10 ** 6):.3f}μs") + print(f"Longest string was '{longest[0]}' taking {(longest[1] * 10 ** 6):.3f}μs") + if i > 0: + print(f"Average time taken: {(total_time / i * 10):.6f}s = " + f"{(total_time / i * 10 ** 6):.3f}μs") + return total_time / i -alphabet = "".join(map(chr, range(ord("a"), ord("a") + NUM_LETTERS))) -combos = map(lambda x: "".join(x)+"$", product(alphabet, repeat=MAX_LEN)) -for i in range(100): - try: - ukkonen(next(combos)) - except StopIteration: - print("**** TEST ENDED ****")
\ No newline at end of file +# test(ukkonen, 50, 6) + +# if __name__ == "__main__": +# for length in range(2, 10): +# print(f"==== {length=} ====") +# before = test(ukkonen, length, 6) +# after = test(ukkonen, length*2, 6) +# print(f"From {length} -> {length*2}: Ratio = {after/before}") + + +def random_string(length, variance=6): + return "".join(map(chr, [random.randrange(ord("A"), ord("A") + variance) for _ in range(length)])) + + +def do_and_get_averages(func, length, num_times=100): + times = [] + # the_range = range(ord("A"), ord("A")+26+26+6) + # letters = map(chr, the_range) + for string in [random_string(length) for _ in range(num_times)]: + average = 0 + for i in range(5): + # print(string, end=": ") + start = timeit.default_timer() + func(string) + stop = timeit.default_timer() - start + average += stop + # print(f"{(stop * 10 ** 3):.6f}ms") + average /= 5 + times.append(average) + return times + + +# for i in range(50, 100): +# first = sum(do_and_get_averages(ukkonen, i, 100))/100 +# second = sum(do_and_get_averages(ukkonen, i*2, 100))/100 +# print(i, first, second, second/first) + +def random_test(func, length, interval=10000): + i = 0 + while True: + func(random_string(length, 6)) + i += 1 + if i % interval == 0: + print(i) + +random_test(ukkonen, 100) + +# times = [] +# # the_range = range(ord("A"), ord("A")+26+26+6) +# # letters = map(chr, the_range) +# for string in [random_string(20) for _ in range(100)]: +# average = 0 +# for i in range(20): +# # print(string, end=": ") +# start = timeit.default_timer() +# ukkonen(string) +# stop = timeit.default_timer() - start +# average += stop +# # print(f"{(stop * 10 ** 3):.6f}ms") +# average /= 5 +# times.append(average) +# print(times) +# for i in range(1, len(the_range)//2): +# print(f"{i}:\t{times[i-1]}\t{times[(i-1)*2]}\t{times[(i-1)*2]/times[i-1]}") diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 3ec7af4..841ff67 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -141,14 +141,14 @@ def do_phase(root: Node, active: Point, i, last_j, remainder): curr_char = Node.string[i] match = char_is_after(active, curr_char) if match: - print(3) + # print(3) remainder += 1 if node_just_created is not None: node_just_created.link = active.node active = skip_count(1, active, i) did_rule_three = True else: - print(2) + # print(2) if not active.is_explicit(): mediator = split_edge(active) mediator.add_child(Node(i, "#")) @@ -156,6 +156,8 @@ def do_phase(root: Node, active: Point, i, last_j, remainder): node_just_created.link = mediator node_just_created = mediator active.length -= 1 + if active.length == 0: + active.set_node(active.node) else: active.node.add_child(Node(i, "#")) if node_just_created is not None and node_just_created.link is None: @@ -163,10 +165,11 @@ def do_phase(root: Node, active: Point, i, last_j, remainder): remainder = pos(remainder - 1) active.set_node(active.node.link) if remainder > 0: - active = skip_count(remainder, root_point, i - remainder) + active = skip_count(remainder, Point(root), i - remainder) last_j = j j += 1 - root.print_tree() + # print(active) + # root.print_tree() return active, remainder, last_j @@ -177,7 +180,8 @@ def char_is_after(point: Point, char): if point.length == point.edge_node.edge_length: return Node.string[point.edge_node.start] == char else: # If not at the end of an edge - return Node.string[point.index_here() + point.length] == char + # return Node.string[point.index_here() + point.length] == char + return Node.string[point.index_here() + 1] == char def skip_count(num_chars, start_point: Point, index): @@ -198,7 +202,7 @@ def skip_count(num_chars, start_point: Point, index): # Node.string[i] if head.node.root else Node.string[head.node.end_index + 1] # assert head.node.end_index + 1 + chars_left < len(Node.string) - while chars_left > incoming_length: + while chars_left > 0: # assert head.node.end_index + 1 + chars_left < len(Node.string) direction = Node.string[index] next_node = head.node.get_child(direction) @@ -211,45 +215,32 @@ def skip_count(num_chars, start_point: Point, index): index += incoming_length head.set_node(next_node) - direction = Node.string[index] + # direction = Node.string[index] if chars_left > 0: # Landed on an edge - head.edge = direction + head.edge = Node.string[index] head.length = chars_left return head def ukkonen(string): + string += "$" Node.string = string - # string += "$" + Node.global_end = 0 + Node.all_nodes.clear() n = len(string) remainder = 0 last_j = 1 root = create_root() root.add_child(Node(0, "#")) - link_pending = root - # next = Node(4, 6) - # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) - # root.add_child(Node(1, 6)) active = Point(root) - for i in range(1, len(string)): + for i in range(1, n): active, remainder, last_j = do_phase(root, active, i, last_j, remainder) - place = Point(root, "a", 1) - # print(char_is_after(place, "b")) - - # Node.global_end = len(string) - # root.print_tree() - # place = Point(next, "", 0) - # print(place, place.index_here(), place.char_here()) - # place = skip_count(3, place) - # place = skip_count(4, place) - # place = skip_count(3, place) - # print(place, place.index_here(), place.char_here()) - # split_edge(place) - if __name__ == "__main__": - ukkonen("aaaab$") + # ukkonen("DEFDBEFFDDEFFFADEFFB") + ukkonen("abacabad") + print("done") # ukkonen("abcbcbc$") |
