import sys class Node: global_end = 0 all_nodes = [] string = "" def __init__(self, start, end, root=False): self.root = root self.start = start self.end = end self.children = {} self.id = len(self.all_nodes) self.all_nodes.append(self) self.parent = None self.link = None def __str__(self): link_str = "" if self.link is None else f" -> {self.link.id}" if not self.root: j, i = self.tuple() return f"[{self.id}, {self.tuple()}, {self.string[j:i + 1]}{link_str}]" return f"[{self.id} root{link_str}]" def __repr__(self): return f"[{self.id}]" def print_tree(self, spaces=1): print(f"{self}") for edge in self.children: print(f" " * spaces, end="") self.get_child(edge).print_tree(spaces=spaces + 1) def first_char(self): return self.string[self.start] def get_child(self, char): if char in self.children: return self.children[char] return None def add_child(self, child): child.parent = self self.children[child.first_char()] = child return child def remove_child(self, child): self.children.pop(child) @property def end_index(self): return self.tuple()[1] def tuple(self): if self.root: raise Exception("Can't get substring of root.") if self.end == "#": return self.start, self.global_end return self.start, self.end @property def edge_length(self): if self.root: return 0 else: start, end = self.tuple() return end - start + 1 def detach(self): self.parent.remove_child(self) self.parent = None class Point: def __init__(self, node, edge="", length=0): assert isinstance(node, Node) self.node = node self.edge = edge self.length = length def __repr__(self): return f"(Node {self.node.id}'s edge:'{self.edge}', {self.length} along.)" def is_explicit(self): # a.k.a. is not on an edge return self.edge == "" def set_node(self, node): self.node = node self.edge = "" self.length = 0 if not self.is_explicit(): print("WARNING: Node.set_node", file=sys.stderr) @property def edge_node(self) -> Node: return self.node.get_child(self.edge) def index_here(self): if self.node.root: return None if self.is_explicit(): return self.node.start return self.edge_node.start + self.length - 1 def char_here(self): return Node.string[self.index_here()] def create_root(): assert len(Node.all_nodes) == 0 return Node(None, None, root=True) def do_phase(root: Node, active: Point, i, last_j): root_point = Point(root) Node.global_end += 1 did_rule_three = False j = last_j + 1 while not did_rule_three: curr_char = Node.string[i] new_point = skip_count(i + 1, root_point) def char_is_after(point: Point, char): if point.is_explicit(): return char in point.node.children else: if point.length < point.node.edge_length: # If not at the end of an edge return Node.string[point.index_here() + point.length] return None def skip_count(num_chars, start_point: Point): incoming_length = -1 existing_length = 0 head = start_point chars_left = num_chars char = "" if not head.is_explicit(): incoming_length = head.edge_node.edge_length - head.length if num_chars < incoming_length: head.length += num_chars return head head.set_node(head.edge_node) chars_left -= incoming_length direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] while chars_left > incoming_length: direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] next_node = head.node.get_child(direction) incoming_length = next_node.edge_length if chars_left <= incoming_length: break chars_left -= incoming_length head.set_node(next_node) if chars_left > 0: # Landed on an edge head.edge = direction head.length = chars_left return head def ukkonen(string): Node.string = string # string += "$" n = len(string) remainder = 0 last_j = 1 root = create_root() root.add_child(Node(0, 3)).add_child(Node(4, 6)).add_child(Node(7, 7)).add_child(Node(8, "#")) root.add_child(Node(1, 6)) active = Point(root) Node.global_end = len(string) root.print_tree() print(skip_count(151, Point(root, "a", 2))) ukkonen("abcdefghijklmnop$")