diff options
Diffstat (limited to 'ass2/q2/new_suffix_array.py')
| -rw-r--r-- | ass2/q2/new_suffix_array.py | 36 |
1 files changed, 22 insertions, 14 deletions
diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 880f68e..3ec7af4 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -125,44 +125,51 @@ def split_edge(split_point: Point): split_point.node.add_child(mediator) return mediator + def pos(n: int): return max(n, 0) -def do_phase(root: Node, active: Point, i, last_j, remainder, need_link_to): + +def do_phase(root: Node, active: Point, i, last_j, remainder): root_point = Point(root) Node.global_end += 1 did_rule_three = False j = last_j + 1 - while not did_rule_three and j <= i+1: + node_just_created = None + while not did_rule_three and j <= i + 1: curr_char = Node.string[i] match = char_is_after(active, curr_char) if match: print(3) - active = skip_count(1, active, i) remainder += 1 + if node_just_created is not None: + node_just_created.link = active.node + active = skip_count(1, active, i) did_rule_three = True else: print(2) if not active.is_explicit(): mediator = split_edge(active) mediator.add_child(Node(i, "#")) - mediator.link = need_link_to - need_link_to = mediator + if node_just_created is not None: + node_just_created.link = mediator + node_just_created = mediator active.length -= 1 else: active.node.add_child(Node(i, "#")) - remainder = pos(remainder-1) + if node_just_created is not None and node_just_created.link is None: + node_just_created.link = active.node + remainder = pos(remainder - 1) active.set_node(active.node.link) if remainder > 0: - active = skip_count(remainder, root_point, i-remainder) + active = skip_count(remainder, root_point, i - remainder) last_j = j j += 1 root.print_tree() return active, remainder, last_j - def char_is_after(point: Point, char): if point.is_explicit(): return char in point.node.children @@ -173,7 +180,6 @@ def char_is_after(point: Point, char): return Node.string[point.index_here() + point.length] == char - def skip_count(num_chars, start_point: Point, index): incoming_length = -1 existing_length = 0 @@ -213,6 +219,7 @@ def skip_count(num_chars, start_point: Point, index): return head + def ukkonen(string): Node.string = string # string += "$" @@ -221,13 +228,13 @@ def ukkonen(string): last_j = 1 root = create_root() root.add_child(Node(0, "#")) - needs_link_to = root + link_pending = root # next = Node(4, 6) # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) # root.add_child(Node(1, 6)) active = Point(root) - for i in range(1, 9): - active, remainder, last_j = do_phase(root, active, i, last_j, remainder, needs_link_to) + for i in range(1, len(string)): + active, remainder, last_j = do_phase(root, active, i, last_j, remainder) place = Point(root, "a", 1) # print(char_is_after(place, "b")) @@ -243,5 +250,6 @@ def ukkonen(string): # split_edge(place) - -ukkonen("abacabad$") +if __name__ == "__main__": + ukkonen("aaaab$") +# ukkonen("abcbcbc$") |
