From d8bf4377d5c98a1a58f8ddc4dc5def8b1a7f0518 Mon Sep 17 00:00:00 2001 From: akiyamn Date: Fri, 23 Apr 2021 20:39:55 +1000 Subject: I assume splitting is done and skip counting is fixed --- ass2/q2/new_suffix_array.py | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'ass2') diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 0751198..259b1ef 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -46,7 +46,7 @@ class Node: return child def remove_child(self, child): - self.children.pop(child) + self.children.pop(child.first_char()) @property def end_index(self): @@ -97,10 +97,8 @@ class Point: return self.node.get_child(self.edge) def index_here(self): - if self.node.root: - return None if self.is_explicit(): - return self.node.start + return 0 if self.node.root else self.node.start return self.edge_node.start + self.length - 1 def char_here(self): @@ -112,6 +110,18 @@ def create_root(): return Node(None, None, root=True) +def split_edge(split_point: Point): + assert not split_point.is_explicit() + edge = split_point.edge_node + original = edge.tuple() + edge.detach() + mediator = Node(original[0], original[0] + split_point.length - 1) + edge.start = original[0] + split_point.length + assert edge.start <= edge.end_index + mediator.add_child(edge) + split_point.node.add_child(mediator) + return mediator + def do_phase(root: Node, active: Point, i, last_j): root_point = Point(root) Node.global_end += 1 @@ -120,6 +130,9 @@ def do_phase(root: Node, active: Point, i, last_j): while not did_rule_three: curr_char = Node.string[i] new_point = skip_count(i + 1, root_point) + if curr_char == active.char_here(): + pass + def char_is_after(point: Point, char): @@ -146,8 +159,9 @@ def skip_count(num_chars, start_point: Point): head.set_node(head.edge_node) chars_left -= incoming_length - direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] + assert head.node.end_index + 1 + chars_left < len(Node.string) while chars_left > incoming_length: + assert head.node.end_index + 1 + chars_left < len(Node.string) direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] next_node = head.node.get_child(direction) incoming_length = next_node.edge_length @@ -156,6 +170,7 @@ def skip_count(num_chars, start_point: Point): chars_left -= incoming_length head.set_node(next_node) + direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] if chars_left > 0: # Landed on an edge head.edge = direction @@ -163,7 +178,6 @@ def skip_count(num_chars, start_point: Point): return head - def ukkonen(string): Node.string = string # string += "$" @@ -171,12 +185,21 @@ def ukkonen(string): remainder = 0 last_j = 1 root = create_root() - root.add_child(Node(0, 3)).add_child(Node(4, 6)).add_child(Node(7, 7)).add_child(Node(8, "#")) + next = Node(4, 6) + root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) root.add_child(Node(1, 6)) active = Point(root) Node.global_end = len(string) root.print_tree() - print(skip_count(151, Point(root, "a", 2))) + place = Point(next, "", 0) + # print(place, place.index_here(), place.char_here()) + place = skip_count(3, place) + place = skip_count(4, place) + place = skip_count(3, place) + print(place, place.index_here(), place.char_here()) + # split_edge(place) + root.print_tree() + ukkonen("abcdefghijklmnop$") -- cgit v1.2.3