aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorakiyamn2021-04-24 12:16:41 +1000
committerakiyamn2021-04-24 12:16:41 +1000
commit4731bb568fc8f4a4b6d8524ec0853d16504c4583 (patch)
treea5a804856591d9c6f441b3178a714b52c0cba22f
parent16f55f173b98796f1f548a9505bbd25c05d65f98 (diff)
downloadfit3155-4731bb568fc8f4a4b6d8524ec0853d16504c4583.tar.gz
fit3155-4731bb568fc8f4a4b6d8524ec0853d16504c4583.zip
Ass 2: Ukkonen's kind of works?
-rw-r--r--ass2/q2/new_suffix_array.py63
1 files changed, 44 insertions, 19 deletions
diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py
index 0bc5bd5..880f68e 100644
--- a/ass2/q2/new_suffix_array.py
+++ b/ass2/q2/new_suffix_array.py
@@ -6,8 +6,8 @@ class Node:
all_nodes = []
string = ""
- def __init__(self, start, end, root=False):
- self.root = root
+ def __init__(self, start, end):
+ self.root = False
self.start = start
self.end = end
self.children = {}
@@ -107,7 +107,10 @@ class Point:
def create_root():
assert len(Node.all_nodes) == 0
- return Node(None, None, root=True)
+ root = Node(None, None)
+ root.root = True
+ root.link = root
+ return root
def split_edge(split_point: Point):
@@ -122,27 +125,40 @@ def split_edge(split_point: Point):
split_point.node.add_child(mediator)
return mediator
-def do_phase(root: Node, active: Point, i, last_j, remainder):
+def pos(n: int):
+ return max(n, 0)
+
+def do_phase(root: Node, active: Point, i, last_j, remainder, need_link_to):
root_point = Point(root)
Node.global_end += 1
did_rule_three = False
j = last_j + 1
while not did_rule_three and j <= i+1:
+
curr_char = Node.string[i]
match = char_is_after(active, curr_char)
if match:
- active = skip_count(1, active)
+ print(3)
+ active = skip_count(1, active, i)
remainder += 1
did_rule_three = True
else:
+ print(2)
if not active.is_explicit():
mediator = split_edge(active)
mediator.add_child(Node(i, "#"))
+ mediator.link = need_link_to
+ need_link_to = mediator
+ active.length -= 1
else:
active.node.add_child(Node(i, "#"))
- remainder -= 1
- last_j = i + 1
+ remainder = pos(remainder-1)
+ active.set_node(active.node.link)
+ if remainder > 0:
+ active = skip_count(remainder, root_point, i-remainder)
+ last_j = j
j += 1
+ root.print_tree()
return active, remainder, last_j
@@ -151,12 +167,14 @@ def char_is_after(point: Point, char):
if point.is_explicit():
return char in point.node.children
else:
- if point.length < point.node.edge_length: # If not at the end of an edge
- return Node.string[point.index_here() + point.length]
- return None
+ if point.length == point.edge_node.edge_length:
+ return Node.string[point.edge_node.start] == char
+ else: # If not at the end of an edge
+ return Node.string[point.index_here() + point.length] == char
-def skip_count(num_chars, start_point: Point):
+
+def skip_count(num_chars, start_point: Point, index):
incoming_length = -1
existing_length = 0
head = start_point
@@ -170,21 +188,24 @@ def skip_count(num_chars, start_point: Point):
return head
head.set_node(head.edge_node)
chars_left -= incoming_length
+ index += incoming_length
+ # Node.string[i] if head.node.root else Node.string[head.node.end_index + 1]
# assert head.node.end_index + 1 + chars_left < len(Node.string)
while chars_left > incoming_length:
# assert head.node.end_index + 1 + chars_left < len(Node.string)
- direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1]
+ direction = Node.string[index]
next_node = head.node.get_child(direction)
if next_node is None:
- return None
+ raise Exception(f"Attempted to traverse char\n '{direction}' at point {head}. ({index=})")
incoming_length = next_node.edge_length
- if chars_left <= incoming_length:
+ if chars_left < incoming_length:
break
chars_left -= incoming_length
+ index += incoming_length
head.set_node(next_node)
- direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1]
+ direction = Node.string[index]
if chars_left > 0: # Landed on an edge
head.edge = direction
@@ -200,12 +221,17 @@ def ukkonen(string):
last_j = 1
root = create_root()
root.add_child(Node(0, "#"))
+ needs_link_to = root
# next = Node(4, 6)
# root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#"))
# root.add_child(Node(1, 6))
active = Point(root)
- for i in range(1, n):
- active, remainder, last_j = do_phase(root, active, i, last_j, remainder)
+ for i in range(1, 9):
+ active, remainder, last_j = do_phase(root, active, i, last_j, remainder, needs_link_to)
+
+ place = Point(root, "a", 1)
+ # print(char_is_after(place, "b"))
+
# Node.global_end = len(string)
# root.print_tree()
# place = Point(next, "", 0)
@@ -215,8 +241,7 @@ def ukkonen(string):
# place = skip_count(3, place)
# print(place, place.index_here(), place.char_here())
# split_edge(place)
- root.print_tree()
-ukkonen("abcdefghijklmnop$")
+ukkonen("abacabad$")