diff options
| author | akiyamn | 2021-04-25 19:42:09 +1000 |
|---|---|---|
| committer | akiyamn | 2021-04-25 19:42:09 +1000 |
| commit | e347fd3246a4282d7fea85c1ae727e48c810480b (patch) | |
| tree | 4424fc641e6b49e5cef007f778fb617c15684e70 /ass2/q2/new_suffix_array.py | |
| parent | f1e1e2a64e30e93ce0d314f62855c16cb90e34c1 (diff) | |
| download | fit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.tar.gz fit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.zip | |
Ass 2: Maybe possibly Ukkonen's could be working probably
Diffstat (limited to 'ass2/q2/new_suffix_array.py')
| -rw-r--r-- | ass2/q2/new_suffix_array.py | 47 |
1 files changed, 19 insertions, 28 deletions
diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 3ec7af4..841ff67 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -141,14 +141,14 @@ def do_phase(root: Node, active: Point, i, last_j, remainder): curr_char = Node.string[i] match = char_is_after(active, curr_char) if match: - print(3) + # print(3) remainder += 1 if node_just_created is not None: node_just_created.link = active.node active = skip_count(1, active, i) did_rule_three = True else: - print(2) + # print(2) if not active.is_explicit(): mediator = split_edge(active) mediator.add_child(Node(i, "#")) @@ -156,6 +156,8 @@ def do_phase(root: Node, active: Point, i, last_j, remainder): node_just_created.link = mediator node_just_created = mediator active.length -= 1 + if active.length == 0: + active.set_node(active.node) else: active.node.add_child(Node(i, "#")) if node_just_created is not None and node_just_created.link is None: @@ -163,10 +165,11 @@ def do_phase(root: Node, active: Point, i, last_j, remainder): remainder = pos(remainder - 1) active.set_node(active.node.link) if remainder > 0: - active = skip_count(remainder, root_point, i - remainder) + active = skip_count(remainder, Point(root), i - remainder) last_j = j j += 1 - root.print_tree() + # print(active) + # root.print_tree() return active, remainder, last_j @@ -177,7 +180,8 @@ def char_is_after(point: Point, char): if point.length == point.edge_node.edge_length: return Node.string[point.edge_node.start] == char else: # If not at the end of an edge - return Node.string[point.index_here() + point.length] == char + # return Node.string[point.index_here() + point.length] == char + return Node.string[point.index_here() + 1] == char def skip_count(num_chars, start_point: Point, index): @@ -198,7 +202,7 @@ def skip_count(num_chars, start_point: Point, index): # Node.string[i] if head.node.root else Node.string[head.node.end_index + 1] # assert head.node.end_index + 1 + chars_left < len(Node.string) - while chars_left > incoming_length: + while chars_left > 0: # assert head.node.end_index + 1 + chars_left < len(Node.string) direction = Node.string[index] next_node = head.node.get_child(direction) @@ -211,45 +215,32 @@ def skip_count(num_chars, start_point: Point, index): index += incoming_length head.set_node(next_node) - direction = Node.string[index] + # direction = Node.string[index] if chars_left > 0: # Landed on an edge - head.edge = direction + head.edge = Node.string[index] head.length = chars_left return head def ukkonen(string): + string += "$" Node.string = string - # string += "$" + Node.global_end = 0 + Node.all_nodes.clear() n = len(string) remainder = 0 last_j = 1 root = create_root() root.add_child(Node(0, "#")) - link_pending = root - # next = Node(4, 6) - # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) - # root.add_child(Node(1, 6)) active = Point(root) - for i in range(1, len(string)): + for i in range(1, n): active, remainder, last_j = do_phase(root, active, i, last_j, remainder) - place = Point(root, "a", 1) - # print(char_is_after(place, "b")) - - # Node.global_end = len(string) - # root.print_tree() - # place = Point(next, "", 0) - # print(place, place.index_here(), place.char_here()) - # place = skip_count(3, place) - # place = skip_count(4, place) - # place = skip_count(3, place) - # print(place, place.index_here(), place.char_here()) - # split_edge(place) - if __name__ == "__main__": - ukkonen("aaaab$") + # ukkonen("DEFDBEFFDDEFFFADEFFB") + ukkonen("abacabad") + print("done") # ukkonen("abcbcbc$") |
