diff options
| author | akiyamn | 2021-04-25 19:42:09 +1000 | 
|---|---|---|
| committer | akiyamn | 2021-04-25 19:42:09 +1000 | 
| commit | e347fd3246a4282d7fea85c1ae727e48c810480b (patch) | |
| tree | 4424fc641e6b49e5cef007f778fb617c15684e70 /ass2 | |
| parent | f1e1e2a64e30e93ce0d314f62855c16cb90e34c1 (diff) | |
| download | fit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.tar.gz fit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.zip | |
Ass 2: Maybe possibly Ukkonen's could be working probably
Diffstat (limited to 'ass2')
| -rw-r--r-- | ass2/q2/gen_cases.py | 104 | ||||
| -rw-r--r-- | ass2/q2/new_suffix_array.py | 47 | 
2 files changed, 113 insertions, 38 deletions
| diff --git a/ass2/q2/gen_cases.py b/ass2/q2/gen_cases.py index f61222b..82d165e 100644 --- a/ass2/q2/gen_cases.py +++ b/ass2/q2/gen_cases.py @@ -1,15 +1,99 @@ -from itertools import product +import math +from itertools import product, accumulate, count, repeat +from functools import reduce  from new_suffix_array import ukkonen +import timeit +import random -MAX_LEN = 5 -NUM_LETTERS = 6 +def test(func, max_len, max_letters, num_iterations=math.inf): +    alphabet = "".join(map(chr, range(ord("a"), ord("a") + max_letters))) +    combos = map(lambda x: "".join(x), product(alphabet, repeat=max_len-1)) +    total_time = 0 +    longest = "", -1 +    i = 0 +    for i, string in enumerate(combos): +        print(string, end=": ") +        start = timeit.default_timer() +        func(string) +        stop = timeit.default_timer() - start +        print(f"{(stop * 10 ** 3):.6f}ms") +        if stop > longest[1]: +            longest = string, stop +        total_time += stop +        if i >= num_iterations: +            break +    print("**** TEST ENDED ****") +    print(f"Total time taken: {(total_time * 10):.6f}s = " +          f"{(total_time * 10 ** 6):.3f}μs") +    print(f"Longest string was '{longest[0]}' taking {(longest[1] * 10 ** 6):.3f}μs") +    if i > 0: +        print(f"Average time taken: {(total_time / i * 10):.6f}s = " +              f"{(total_time / i * 10 ** 6):.3f}μs") +        return total_time / i -alphabet = "".join(map(chr, range(ord("a"), ord("a") + NUM_LETTERS))) -combos = map(lambda x: "".join(x)+"$", product(alphabet, repeat=MAX_LEN)) -for i in range(100): -    try: -        ukkonen(next(combos)) -    except StopIteration: -        print("**** TEST ENDED ****")
\ No newline at end of file +# test(ukkonen, 50, 6) + +# if __name__ == "__main__": +#     for length in range(2, 10): +#         print(f"==== {length=} ====") +#         before = test(ukkonen, length, 6) +#         after = test(ukkonen, length*2, 6) +#         print(f"From {length} -> {length*2}: Ratio = {after/before}") + + +def random_string(length, variance=6): +    return "".join(map(chr, [random.randrange(ord("A"), ord("A") + variance) for _ in range(length)])) + + +def do_and_get_averages(func, length, num_times=100): +    times = [] +    # the_range = range(ord("A"), ord("A")+26+26+6) +    # letters = map(chr, the_range) +    for string in [random_string(length) for _ in range(num_times)]: +        average = 0 +        for i in range(5): +            # print(string, end=": ") +            start = timeit.default_timer() +            func(string) +            stop = timeit.default_timer() - start +            average += stop +            # print(f"{(stop * 10 ** 3):.6f}ms") +        average /= 5 +        times.append(average) +    return times + + +# for i in range(50, 100): +#     first = sum(do_and_get_averages(ukkonen, i, 100))/100 +#     second = sum(do_and_get_averages(ukkonen, i*2, 100))/100 +#     print(i, first, second, second/first) + +def random_test(func, length, interval=10000): +    i = 0 +    while True: +        func(random_string(length, 6)) +        i += 1 +        if i % interval == 0: +            print(i) + +random_test(ukkonen, 100) + +# times = [] +# # the_range = range(ord("A"), ord("A")+26+26+6) +# # letters = map(chr, the_range) +# for string in [random_string(20) for _ in range(100)]: +#     average = 0 +#     for i in range(20): +#         # print(string, end=": ") +#         start = timeit.default_timer() +#         ukkonen(string) +#         stop = timeit.default_timer() - start +#         average += stop +#         # print(f"{(stop * 10 ** 3):.6f}ms") +#     average /= 5 +#     times.append(average) +# print(times) +# for i in range(1, len(the_range)//2): +#     print(f"{i}:\t{times[i-1]}\t{times[(i-1)*2]}\t{times[(i-1)*2]/times[i-1]}") diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 3ec7af4..841ff67 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -141,14 +141,14 @@ def do_phase(root: Node, active: Point, i, last_j, remainder):          curr_char = Node.string[i]          match = char_is_after(active, curr_char)          if match: -            print(3) +            # print(3)              remainder += 1              if node_just_created is not None:                  node_just_created.link = active.node              active = skip_count(1, active, i)              did_rule_three = True          else: -            print(2) +            # print(2)              if not active.is_explicit():                  mediator = split_edge(active)                  mediator.add_child(Node(i, "#")) @@ -156,6 +156,8 @@ def do_phase(root: Node, active: Point, i, last_j, remainder):                      node_just_created.link = mediator                  node_just_created = mediator                  active.length -= 1 +                if active.length == 0: +                    active.set_node(active.node)              else:                  active.node.add_child(Node(i, "#"))                  if node_just_created is not None and node_just_created.link is None: @@ -163,10 +165,11 @@ def do_phase(root: Node, active: Point, i, last_j, remainder):              remainder = pos(remainder - 1)              active.set_node(active.node.link)              if remainder > 0: -                active = skip_count(remainder, root_point, i - remainder) +                active = skip_count(remainder, Point(root), i - remainder)              last_j = j              j += 1 -        root.print_tree() +        # print(active) +        # root.print_tree()      return active, remainder, last_j @@ -177,7 +180,8 @@ def char_is_after(point: Point, char):          if point.length == point.edge_node.edge_length:              return Node.string[point.edge_node.start] == char          else:  # If not at the end of an edge -            return Node.string[point.index_here() + point.length] == char +            # return Node.string[point.index_here() + point.length] == char +            return Node.string[point.index_here() + 1] == char  def skip_count(num_chars, start_point: Point, index): @@ -198,7 +202,7 @@ def skip_count(num_chars, start_point: Point, index):      # Node.string[i] if head.node.root else Node.string[head.node.end_index + 1]      # assert head.node.end_index + 1 + chars_left < len(Node.string) -    while chars_left > incoming_length: +    while chars_left > 0:          # assert head.node.end_index + 1 + chars_left < len(Node.string)          direction = Node.string[index]          next_node = head.node.get_child(direction) @@ -211,45 +215,32 @@ def skip_count(num_chars, start_point: Point, index):          index += incoming_length          head.set_node(next_node) -    direction = Node.string[index] +    # direction = Node.string[index]      if chars_left > 0:  # Landed on an edge -        head.edge = direction +        head.edge = Node.string[index]          head.length = chars_left      return head  def ukkonen(string): +    string += "$"      Node.string = string -    # string += "$" +    Node.global_end = 0 +    Node.all_nodes.clear()      n = len(string)      remainder = 0      last_j = 1      root = create_root()      root.add_child(Node(0, "#")) -    link_pending = root -    # next = Node(4, 6) -    # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) -    # root.add_child(Node(1, 6))      active = Point(root) -    for i in range(1, len(string)): +    for i in range(1, n):          active, remainder, last_j = do_phase(root, active, i, last_j, remainder) -    place = Point(root, "a", 1) -    # print(char_is_after(place, "b")) - -    # Node.global_end = len(string) -    # root.print_tree() -    # place = Point(next, "", 0) -    # print(place, place.index_here(), place.char_here()) -    # place = skip_count(3, place) -    # place = skip_count(4, place) -    # place = skip_count(3, place) -    # print(place, place.index_here(), place.char_here()) -    # split_edge(place) -  if __name__ == "__main__": -    ukkonen("aaaab$") +    # ukkonen("DEFDBEFFDDEFFFADEFFB") +    ukkonen("abacabad") +    print("done")  # ukkonen("abcbcbc$") | 
