aboutsummaryrefslogtreecommitdiff
path: root/ass2/q2/new_suffix_array.py
diff options
context:
space:
mode:
authorakiyamn2021-04-25 19:42:09 +1000
committerakiyamn2021-04-25 19:42:09 +1000
commite347fd3246a4282d7fea85c1ae727e48c810480b (patch)
tree4424fc641e6b49e5cef007f778fb617c15684e70 /ass2/q2/new_suffix_array.py
parentf1e1e2a64e30e93ce0d314f62855c16cb90e34c1 (diff)
downloadfit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.tar.gz
fit3155-e347fd3246a4282d7fea85c1ae727e48c810480b.zip
Ass 2: Maybe possibly Ukkonen's could be working probably
Diffstat (limited to 'ass2/q2/new_suffix_array.py')
-rw-r--r--ass2/q2/new_suffix_array.py47
1 files changed, 19 insertions, 28 deletions
diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py
index 3ec7af4..841ff67 100644
--- a/ass2/q2/new_suffix_array.py
+++ b/ass2/q2/new_suffix_array.py
@@ -141,14 +141,14 @@ def do_phase(root: Node, active: Point, i, last_j, remainder):
curr_char = Node.string[i]
match = char_is_after(active, curr_char)
if match:
- print(3)
+ # print(3)
remainder += 1
if node_just_created is not None:
node_just_created.link = active.node
active = skip_count(1, active, i)
did_rule_three = True
else:
- print(2)
+ # print(2)
if not active.is_explicit():
mediator = split_edge(active)
mediator.add_child(Node(i, "#"))
@@ -156,6 +156,8 @@ def do_phase(root: Node, active: Point, i, last_j, remainder):
node_just_created.link = mediator
node_just_created = mediator
active.length -= 1
+ if active.length == 0:
+ active.set_node(active.node)
else:
active.node.add_child(Node(i, "#"))
if node_just_created is not None and node_just_created.link is None:
@@ -163,10 +165,11 @@ def do_phase(root: Node, active: Point, i, last_j, remainder):
remainder = pos(remainder - 1)
active.set_node(active.node.link)
if remainder > 0:
- active = skip_count(remainder, root_point, i - remainder)
+ active = skip_count(remainder, Point(root), i - remainder)
last_j = j
j += 1
- root.print_tree()
+ # print(active)
+ # root.print_tree()
return active, remainder, last_j
@@ -177,7 +180,8 @@ def char_is_after(point: Point, char):
if point.length == point.edge_node.edge_length:
return Node.string[point.edge_node.start] == char
else: # If not at the end of an edge
- return Node.string[point.index_here() + point.length] == char
+ # return Node.string[point.index_here() + point.length] == char
+ return Node.string[point.index_here() + 1] == char
def skip_count(num_chars, start_point: Point, index):
@@ -198,7 +202,7 @@ def skip_count(num_chars, start_point: Point, index):
# Node.string[i] if head.node.root else Node.string[head.node.end_index + 1]
# assert head.node.end_index + 1 + chars_left < len(Node.string)
- while chars_left > incoming_length:
+ while chars_left > 0:
# assert head.node.end_index + 1 + chars_left < len(Node.string)
direction = Node.string[index]
next_node = head.node.get_child(direction)
@@ -211,45 +215,32 @@ def skip_count(num_chars, start_point: Point, index):
index += incoming_length
head.set_node(next_node)
- direction = Node.string[index]
+ # direction = Node.string[index]
if chars_left > 0: # Landed on an edge
- head.edge = direction
+ head.edge = Node.string[index]
head.length = chars_left
return head
def ukkonen(string):
+ string += "$"
Node.string = string
- # string += "$"
+ Node.global_end = 0
+ Node.all_nodes.clear()
n = len(string)
remainder = 0
last_j = 1
root = create_root()
root.add_child(Node(0, "#"))
- link_pending = root
- # next = Node(4, 6)
- # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#"))
- # root.add_child(Node(1, 6))
active = Point(root)
- for i in range(1, len(string)):
+ for i in range(1, n):
active, remainder, last_j = do_phase(root, active, i, last_j, remainder)
- place = Point(root, "a", 1)
- # print(char_is_after(place, "b"))
-
- # Node.global_end = len(string)
- # root.print_tree()
- # place = Point(next, "", 0)
- # print(place, place.index_here(), place.char_here())
- # place = skip_count(3, place)
- # place = skip_count(4, place)
- # place = skip_count(3, place)
- # print(place, place.index_here(), place.char_here())
- # split_edge(place)
-
if __name__ == "__main__":
- ukkonen("aaaab$")
+ # ukkonen("DEFDBEFFDDEFFFADEFFB")
+ ukkonen("abacabad")
+ print("done")
# ukkonen("abcbcbc$")