1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
|
import sys
class Node:
global_end = 0
all_nodes = []
string = ""
def __init__(self, start, end, root=False):
self.root = root
self.start = start
self.end = end
self.children = {}
self.id = len(self.all_nodes)
self.all_nodes.append(self)
self.parent = None
self.link = None
def __str__(self):
link_str = "" if self.link is None else f" -> {self.link.id}"
if not self.root:
j, i = self.tuple()
return f"[{self.id}, {self.tuple()}, {self.string[j:i + 1]}{link_str}]"
return f"[{self.id} root{link_str}]"
def __repr__(self):
return f"[{self.id}]"
def print_tree(self, spaces=1):
print(f"{self}")
for edge in self.children:
print(f" " * spaces, end="")
self.get_child(edge).print_tree(spaces=spaces + 1)
def first_char(self):
return self.string[self.start]
def get_child(self, char):
if char in self.children:
return self.children[char]
return None
def add_child(self, child):
child.parent = self
self.children[child.first_char()] = child
return child
def remove_child(self, child):
self.children.pop(child)
@property
def end_index(self):
return self.tuple()[1]
def tuple(self):
if self.root:
raise Exception("Can't get substring of root.")
if self.end == "#":
return self.start, self.global_end
return self.start, self.end
@property
def edge_length(self):
if self.root:
return 0
else:
start, end = self.tuple()
return end - start + 1
def detach(self):
self.parent.remove_child(self)
self.parent = None
class Point:
def __init__(self, node, edge="", length=0):
assert isinstance(node, Node)
self.node = node
self.edge = edge
self.length = length
def __repr__(self):
return f"(Node {self.node.id}'s edge:'{self.edge}', {self.length} along.)"
def is_explicit(self): # a.k.a. is not on an edge
return self.edge == ""
def set_node(self, node):
self.node = node
self.edge = ""
self.length = 0
if not self.is_explicit():
print("WARNING: Node.set_node", file=sys.stderr)
@property
def edge_node(self) -> Node:
return self.node.get_child(self.edge)
def index_here(self):
if self.node.root:
return None
if self.is_explicit():
return self.node.start
return self.edge_node.start + self.length - 1
def char_here(self):
return Node.string[self.index_here()]
def create_root():
assert len(Node.all_nodes) == 0
return Node(None, None, root=True)
def do_phase(root: Node, active: Point, i, last_j):
root_point = Point(root)
Node.global_end += 1
did_rule_three = False
j = last_j + 1
while not did_rule_three:
curr_char = Node.string[i]
new_point = skip_count(i + 1, root_point)
def char_is_after(point: Point, char):
if point.is_explicit():
return char in point.node.children
else:
if point.length < point.node.edge_length: # If not at the end of an edge
return Node.string[point.index_here() + point.length]
return None
def skip_count(num_chars, start_point: Point):
incoming_length = -1
existing_length = 0
head = start_point
chars_left = num_chars
char = ""
if not head.is_explicit():
incoming_length = head.edge_node.edge_length - head.length
if num_chars < incoming_length:
head.length += num_chars
return head
head.set_node(head.edge_node)
chars_left -= incoming_length
direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1]
# next_node = head.node.get_child(direction)
# incoming_length = next_node.edge_length
#
# chars_left -= incoming_length
# head.set_node(next_node)
while chars_left > incoming_length:
direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1]
next_node = head.node.get_child(direction)
incoming_length = next_node.edge_length
if chars_left <= incoming_length:
break
chars_left -= incoming_length
head.set_node(next_node)
if chars_left > 0: # Landed on an edge
head.edge = direction
head.length = chars_left
return head
def ukkonen(string):
Node.string = string
# string += "$"
n = len(string)
remainder = 0
last_j = 1
root = create_root()
root.add_child(Node(0, 3)).add_child(Node(4, 6)).add_child(Node(7, 7)).add_child(Node(8, "#"))
root.add_child(Node(1, 6))
active = Point(root)
Node.global_end = len(string)
root.print_tree()
print(skip_count(151, Point(root, "a", 2)))
ukkonen("abcdefghijklmnop$")
|