aboutsummaryrefslogtreecommitdiff
path: root/ass2/q2/new_suffix_array.py
diff options
context:
space:
mode:
Diffstat (limited to 'ass2/q2/new_suffix_array.py')
-rw-r--r--ass2/q2/new_suffix_array.py36
1 files changed, 22 insertions, 14 deletions
diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py
index 880f68e..3ec7af4 100644
--- a/ass2/q2/new_suffix_array.py
+++ b/ass2/q2/new_suffix_array.py
@@ -125,44 +125,51 @@ def split_edge(split_point: Point):
split_point.node.add_child(mediator)
return mediator
+
def pos(n: int):
return max(n, 0)
-def do_phase(root: Node, active: Point, i, last_j, remainder, need_link_to):
+
+def do_phase(root: Node, active: Point, i, last_j, remainder):
root_point = Point(root)
Node.global_end += 1
did_rule_three = False
j = last_j + 1
- while not did_rule_three and j <= i+1:
+ node_just_created = None
+ while not did_rule_three and j <= i + 1:
curr_char = Node.string[i]
match = char_is_after(active, curr_char)
if match:
print(3)
- active = skip_count(1, active, i)
remainder += 1
+ if node_just_created is not None:
+ node_just_created.link = active.node
+ active = skip_count(1, active, i)
did_rule_three = True
else:
print(2)
if not active.is_explicit():
mediator = split_edge(active)
mediator.add_child(Node(i, "#"))
- mediator.link = need_link_to
- need_link_to = mediator
+ if node_just_created is not None:
+ node_just_created.link = mediator
+ node_just_created = mediator
active.length -= 1
else:
active.node.add_child(Node(i, "#"))
- remainder = pos(remainder-1)
+ if node_just_created is not None and node_just_created.link is None:
+ node_just_created.link = active.node
+ remainder = pos(remainder - 1)
active.set_node(active.node.link)
if remainder > 0:
- active = skip_count(remainder, root_point, i-remainder)
+ active = skip_count(remainder, root_point, i - remainder)
last_j = j
j += 1
root.print_tree()
return active, remainder, last_j
-
def char_is_after(point: Point, char):
if point.is_explicit():
return char in point.node.children
@@ -173,7 +180,6 @@ def char_is_after(point: Point, char):
return Node.string[point.index_here() + point.length] == char
-
def skip_count(num_chars, start_point: Point, index):
incoming_length = -1
existing_length = 0
@@ -213,6 +219,7 @@ def skip_count(num_chars, start_point: Point, index):
return head
+
def ukkonen(string):
Node.string = string
# string += "$"
@@ -221,13 +228,13 @@ def ukkonen(string):
last_j = 1
root = create_root()
root.add_child(Node(0, "#"))
- needs_link_to = root
+ link_pending = root
# next = Node(4, 6)
# root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#"))
# root.add_child(Node(1, 6))
active = Point(root)
- for i in range(1, 9):
- active, remainder, last_j = do_phase(root, active, i, last_j, remainder, needs_link_to)
+ for i in range(1, len(string)):
+ active, remainder, last_j = do_phase(root, active, i, last_j, remainder)
place = Point(root, "a", 1)
# print(char_is_after(place, "b"))
@@ -243,5 +250,6 @@ def ukkonen(string):
# split_edge(place)
-
-ukkonen("abacabad$")
+if __name__ == "__main__":
+ ukkonen("aaaab$")
+# ukkonen("abcbcbc$")