diff options
Diffstat (limited to 'ass2/q2/new_suffix_array.py')
| -rw-r--r-- | ass2/q2/new_suffix_array.py | 39 | 
1 files changed, 31 insertions, 8 deletions
| diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 0751198..259b1ef 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -46,7 +46,7 @@ class Node:          return child      def remove_child(self, child): -        self.children.pop(child) +        self.children.pop(child.first_char())      @property      def end_index(self): @@ -97,10 +97,8 @@ class Point:          return self.node.get_child(self.edge)      def index_here(self): -        if self.node.root: -            return None          if self.is_explicit(): -            return self.node.start +            return 0 if self.node.root else self.node.start          return self.edge_node.start + self.length - 1      def char_here(self): @@ -112,6 +110,18 @@ def create_root():      return Node(None, None, root=True) +def split_edge(split_point: Point): +    assert not split_point.is_explicit() +    edge = split_point.edge_node +    original = edge.tuple() +    edge.detach() +    mediator = Node(original[0], original[0] + split_point.length - 1) +    edge.start = original[0] + split_point.length +    assert edge.start <= edge.end_index +    mediator.add_child(edge) +    split_point.node.add_child(mediator) +    return mediator +  def do_phase(root: Node, active: Point, i, last_j):      root_point = Point(root)      Node.global_end += 1 @@ -120,6 +130,9 @@ def do_phase(root: Node, active: Point, i, last_j):      while not did_rule_three:          curr_char = Node.string[i]          new_point = skip_count(i + 1, root_point) +        if curr_char == active.char_here(): +            pass +  def char_is_after(point: Point, char): @@ -146,8 +159,9 @@ def skip_count(num_chars, start_point: Point):          head.set_node(head.edge_node)          chars_left -= incoming_length -    direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] +    assert head.node.end_index + 1 + chars_left < len(Node.string)      while chars_left > incoming_length: +        assert head.node.end_index + 1 + chars_left < len(Node.string)          direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1]          next_node = head.node.get_child(direction)          incoming_length = next_node.edge_length @@ -156,6 +170,7 @@ def skip_count(num_chars, start_point: Point):          chars_left -= incoming_length          head.set_node(next_node) +    direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1]      if chars_left > 0:  # Landed on an edge          head.edge = direction @@ -163,7 +178,6 @@ def skip_count(num_chars, start_point: Point):      return head -  def ukkonen(string):      Node.string = string      # string += "$" @@ -171,12 +185,21 @@ def ukkonen(string):      remainder = 0      last_j = 1      root = create_root() -    root.add_child(Node(0, 3)).add_child(Node(4, 6)).add_child(Node(7, 7)).add_child(Node(8, "#")) +    next = Node(4, 6) +    root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#"))      root.add_child(Node(1, 6))      active = Point(root)      Node.global_end = len(string)      root.print_tree() -    print(skip_count(151, Point(root, "a", 2))) +    place = Point(next, "", 0) +    # print(place, place.index_here(), place.char_here()) +    place = skip_count(3, place) +    place = skip_count(4, place) +    place = skip_count(3, place) +    print(place, place.index_here(), place.char_here()) +    # split_edge(place) +    root.print_tree() +  ukkonen("abcdefghijklmnop$") | 
