diff options
| -rw-r--r-- | ass2/q2/new_suffix_array.py | 63 | 
1 files changed, 44 insertions, 19 deletions
| diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 0bc5bd5..880f68e 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -6,8 +6,8 @@ class Node:      all_nodes = []      string = "" -    def __init__(self, start, end, root=False): -        self.root = root +    def __init__(self, start, end): +        self.root = False          self.start = start          self.end = end          self.children = {} @@ -107,7 +107,10 @@ class Point:  def create_root():      assert len(Node.all_nodes) == 0 -    return Node(None, None, root=True) +    root = Node(None, None) +    root.root = True +    root.link = root +    return root  def split_edge(split_point: Point): @@ -122,27 +125,40 @@ def split_edge(split_point: Point):      split_point.node.add_child(mediator)      return mediator -def do_phase(root: Node, active: Point, i, last_j, remainder): +def pos(n: int): +    return max(n, 0) + +def do_phase(root: Node, active: Point, i, last_j, remainder, need_link_to):      root_point = Point(root)      Node.global_end += 1      did_rule_three = False      j = last_j + 1      while not did_rule_three and j <= i+1: +          curr_char = Node.string[i]          match = char_is_after(active, curr_char)          if match: -            active = skip_count(1, active) +            print(3) +            active = skip_count(1, active, i)              remainder += 1              did_rule_three = True          else: +            print(2)              if not active.is_explicit():                  mediator = split_edge(active)                  mediator.add_child(Node(i, "#")) +                mediator.link = need_link_to +                need_link_to = mediator +                active.length -= 1              else:                  active.node.add_child(Node(i, "#")) -            remainder -= 1 -            last_j = i + 1 +            remainder = pos(remainder-1) +            active.set_node(active.node.link) +            if remainder > 0: +                active = skip_count(remainder, root_point, i-remainder) +            last_j = j              j += 1 +        root.print_tree()      return active, remainder, last_j @@ -151,12 +167,14 @@ def char_is_after(point: Point, char):      if point.is_explicit():          return char in point.node.children      else: -        if point.length < point.node.edge_length:  # If not at the end of an edge -            return Node.string[point.index_here() + point.length] -        return None +        if point.length == point.edge_node.edge_length: +            return Node.string[point.edge_node.start] == char +        else:  # If not at the end of an edge +            return Node.string[point.index_here() + point.length] == char -def skip_count(num_chars, start_point: Point): + +def skip_count(num_chars, start_point: Point, index):      incoming_length = -1      existing_length = 0      head = start_point @@ -170,21 +188,24 @@ def skip_count(num_chars, start_point: Point):              return head          head.set_node(head.edge_node)          chars_left -= incoming_length +        index += incoming_length +    # Node.string[i] if head.node.root else Node.string[head.node.end_index + 1]      # assert head.node.end_index + 1 + chars_left < len(Node.string)      while chars_left > incoming_length:          # assert head.node.end_index + 1 + chars_left < len(Node.string) -        direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] +        direction = Node.string[index]          next_node = head.node.get_child(direction)          if next_node is None: -            return None +            raise Exception(f"Attempted to traverse char\n '{direction}' at point {head}. ({index=})")          incoming_length = next_node.edge_length -        if chars_left <= incoming_length: +        if chars_left < incoming_length:              break          chars_left -= incoming_length +        index += incoming_length          head.set_node(next_node) -    direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] +    direction = Node.string[index]      if chars_left > 0:  # Landed on an edge          head.edge = direction @@ -200,12 +221,17 @@ def ukkonen(string):      last_j = 1      root = create_root()      root.add_child(Node(0, "#")) +    needs_link_to = root      # next = Node(4, 6)      # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#"))      # root.add_child(Node(1, 6))      active = Point(root) -    for i in range(1, n): -        active, remainder, last_j = do_phase(root, active, i, last_j, remainder) +    for i in range(1, 9): +        active, remainder, last_j = do_phase(root, active, i, last_j, remainder, needs_link_to) + +    place = Point(root, "a", 1) +    # print(char_is_after(place, "b")) +      # Node.global_end = len(string)      # root.print_tree()      # place = Point(next, "", 0) @@ -215,8 +241,7 @@ def ukkonen(string):      # place = skip_count(3, place)      # print(place, place.index_here(), place.char_here())      # split_edge(place) -    root.print_tree() -ukkonen("abcdefghijklmnop$") +ukkonen("abacabad$") | 
