diff options
| author | akiyamn | 2021-04-24 14:22:03 +1000 |
|---|---|---|
| committer | akiyamn | 2021-04-24 14:22:03 +1000 |
| commit | f1e1e2a64e30e93ce0d314f62855c16cb90e34c1 (patch) | |
| tree | 193a4a5e2623f66955fa4322d4f0dbeb47bd9dce | |
| parent | 4731bb568fc8f4a4b6d8524ec0853d16504c4583 (diff) | |
| download | fit3155-f1e1e2a64e30e93ce0d314f62855c16cb90e34c1.tar.gz fit3155-f1e1e2a64e30e93ce0d314f62855c16cb90e34c1.zip | |
Ass 2: Maybe works, doing testing
| -rw-r--r-- | ass2/q2/gen_cases.py | 15 | ||||
| -rw-r--r-- | ass2/q2/new_suffix_array.py | 36 | ||||
| -rw-r--r-- | ass2/q2/suffix_array.py | 3 |
3 files changed, 39 insertions, 15 deletions
diff --git a/ass2/q2/gen_cases.py b/ass2/q2/gen_cases.py new file mode 100644 index 0000000..f61222b --- /dev/null +++ b/ass2/q2/gen_cases.py @@ -0,0 +1,15 @@ +from itertools import product +from new_suffix_array import ukkonen + +MAX_LEN = 5 +NUM_LETTERS = 6 + + +alphabet = "".join(map(chr, range(ord("a"), ord("a") + NUM_LETTERS))) +combos = map(lambda x: "".join(x)+"$", product(alphabet, repeat=MAX_LEN)) + +for i in range(100): + try: + ukkonen(next(combos)) + except StopIteration: + print("**** TEST ENDED ****")
\ No newline at end of file diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 880f68e..3ec7af4 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -125,44 +125,51 @@ def split_edge(split_point: Point): split_point.node.add_child(mediator) return mediator + def pos(n: int): return max(n, 0) -def do_phase(root: Node, active: Point, i, last_j, remainder, need_link_to): + +def do_phase(root: Node, active: Point, i, last_j, remainder): root_point = Point(root) Node.global_end += 1 did_rule_three = False j = last_j + 1 - while not did_rule_three and j <= i+1: + node_just_created = None + while not did_rule_three and j <= i + 1: curr_char = Node.string[i] match = char_is_after(active, curr_char) if match: print(3) - active = skip_count(1, active, i) remainder += 1 + if node_just_created is not None: + node_just_created.link = active.node + active = skip_count(1, active, i) did_rule_three = True else: print(2) if not active.is_explicit(): mediator = split_edge(active) mediator.add_child(Node(i, "#")) - mediator.link = need_link_to - need_link_to = mediator + if node_just_created is not None: + node_just_created.link = mediator + node_just_created = mediator active.length -= 1 else: active.node.add_child(Node(i, "#")) - remainder = pos(remainder-1) + if node_just_created is not None and node_just_created.link is None: + node_just_created.link = active.node + remainder = pos(remainder - 1) active.set_node(active.node.link) if remainder > 0: - active = skip_count(remainder, root_point, i-remainder) + active = skip_count(remainder, root_point, i - remainder) last_j = j j += 1 root.print_tree() return active, remainder, last_j - def char_is_after(point: Point, char): if point.is_explicit(): return char in point.node.children @@ -173,7 +180,6 @@ def char_is_after(point: Point, char): return Node.string[point.index_here() + point.length] == char - def skip_count(num_chars, start_point: Point, index): incoming_length = -1 existing_length = 0 @@ -213,6 +219,7 @@ def skip_count(num_chars, start_point: Point, index): return head + def ukkonen(string): Node.string = string # string += "$" @@ -221,13 +228,13 @@ def ukkonen(string): last_j = 1 root = create_root() root.add_child(Node(0, "#")) - needs_link_to = root + link_pending = root # next = Node(4, 6) # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) # root.add_child(Node(1, 6)) active = Point(root) - for i in range(1, 9): - active, remainder, last_j = do_phase(root, active, i, last_j, remainder, needs_link_to) + for i in range(1, len(string)): + active, remainder, last_j = do_phase(root, active, i, last_j, remainder) place = Point(root, "a", 1) # print(char_is_after(place, "b")) @@ -243,5 +250,6 @@ def ukkonen(string): # split_edge(place) - -ukkonen("abacabad$") +if __name__ == "__main__": + ukkonen("aaaab$") +# ukkonen("abcbcbc$") diff --git a/ass2/q2/suffix_array.py b/ass2/q2/suffix_array.py index eccc99e..61ac445 100644 --- a/ass2/q2/suffix_array.py +++ b/ass2/q2/suffix_array.py @@ -197,4 +197,5 @@ def suffix_tree(string): print("\n" * 10) -suffix_tree("abcabxabcd") +if __name__ == "__main__": + suffix_tree("abcabxabcd") |
