diff options
| author | akiyamn | 2021-04-23 21:28:41 +1000 |
|---|---|---|
| committer | akiyamn | 2021-04-23 21:28:41 +1000 |
| commit | 16f55f173b98796f1f548a9505bbd25c05d65f98 (patch) | |
| tree | dcbb63b372e6d763ac1ef88e6164a7de38335c6d /ass2/q2/new_suffix_array.py | |
| parent | d8bf4377d5c98a1a58f8ddc4dc5def8b1a7f0518 (diff) | |
| download | fit3155-16f55f173b98796f1f548a9505bbd25c05d65f98.tar.gz fit3155-16f55f173b98796f1f548a9505bbd25c05d65f98.zip | |
Ass 2: Rule 2 maybe working?
Diffstat (limited to 'ass2/q2/new_suffix_array.py')
| -rw-r--r-- | ass2/q2/new_suffix_array.py | 51 |
1 files changed, 34 insertions, 17 deletions
diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 259b1ef..0bc5bd5 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -122,16 +122,28 @@ def split_edge(split_point: Point): split_point.node.add_child(mediator) return mediator -def do_phase(root: Node, active: Point, i, last_j): +def do_phase(root: Node, active: Point, i, last_j, remainder): root_point = Point(root) Node.global_end += 1 did_rule_three = False j = last_j + 1 - while not did_rule_three: + while not did_rule_three and j <= i+1: curr_char = Node.string[i] - new_point = skip_count(i + 1, root_point) - if curr_char == active.char_here(): - pass + match = char_is_after(active, curr_char) + if match: + active = skip_count(1, active) + remainder += 1 + did_rule_three = True + else: + if not active.is_explicit(): + mediator = split_edge(active) + mediator.add_child(Node(i, "#")) + else: + active.node.add_child(Node(i, "#")) + remainder -= 1 + last_j = i + 1 + j += 1 + return active, remainder, last_j @@ -159,11 +171,13 @@ def skip_count(num_chars, start_point: Point): head.set_node(head.edge_node) chars_left -= incoming_length - assert head.node.end_index + 1 + chars_left < len(Node.string) + # assert head.node.end_index + 1 + chars_left < len(Node.string) while chars_left > incoming_length: - assert head.node.end_index + 1 + chars_left < len(Node.string) + # assert head.node.end_index + 1 + chars_left < len(Node.string) direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] next_node = head.node.get_child(direction) + if next_node is None: + return None incoming_length = next_node.edge_length if chars_left <= incoming_length: break @@ -185,18 +199,21 @@ def ukkonen(string): remainder = 0 last_j = 1 root = create_root() - next = Node(4, 6) - root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) - root.add_child(Node(1, 6)) + root.add_child(Node(0, "#")) + # next = Node(4, 6) + # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) + # root.add_child(Node(1, 6)) active = Point(root) - Node.global_end = len(string) - root.print_tree() - place = Point(next, "", 0) + for i in range(1, n): + active, remainder, last_j = do_phase(root, active, i, last_j, remainder) + # Node.global_end = len(string) + # root.print_tree() + # place = Point(next, "", 0) + # print(place, place.index_here(), place.char_here()) + # place = skip_count(3, place) + # place = skip_count(4, place) + # place = skip_count(3, place) # print(place, place.index_here(), place.char_here()) - place = skip_count(3, place) - place = skip_count(4, place) - place = skip_count(3, place) - print(place, place.index_here(), place.char_here()) # split_edge(place) root.print_tree() |
