diff options
| author | akiyamn | 2021-04-24 12:16:41 +1000 |
|---|---|---|
| committer | akiyamn | 2021-04-24 12:16:41 +1000 |
| commit | 4731bb568fc8f4a4b6d8524ec0853d16504c4583 (patch) | |
| tree | a5a804856591d9c6f441b3178a714b52c0cba22f /ass2/q2/new_suffix_array.py | |
| parent | 16f55f173b98796f1f548a9505bbd25c05d65f98 (diff) | |
| download | fit3155-4731bb568fc8f4a4b6d8524ec0853d16504c4583.tar.gz fit3155-4731bb568fc8f4a4b6d8524ec0853d16504c4583.zip | |
Ass 2: Ukkonen's kind of works?
Diffstat (limited to 'ass2/q2/new_suffix_array.py')
| -rw-r--r-- | ass2/q2/new_suffix_array.py | 63 |
1 files changed, 44 insertions, 19 deletions
diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 0bc5bd5..880f68e 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -6,8 +6,8 @@ class Node: all_nodes = [] string = "" - def __init__(self, start, end, root=False): - self.root = root + def __init__(self, start, end): + self.root = False self.start = start self.end = end self.children = {} @@ -107,7 +107,10 @@ class Point: def create_root(): assert len(Node.all_nodes) == 0 - return Node(None, None, root=True) + root = Node(None, None) + root.root = True + root.link = root + return root def split_edge(split_point: Point): @@ -122,27 +125,40 @@ def split_edge(split_point: Point): split_point.node.add_child(mediator) return mediator -def do_phase(root: Node, active: Point, i, last_j, remainder): +def pos(n: int): + return max(n, 0) + +def do_phase(root: Node, active: Point, i, last_j, remainder, need_link_to): root_point = Point(root) Node.global_end += 1 did_rule_three = False j = last_j + 1 while not did_rule_three and j <= i+1: + curr_char = Node.string[i] match = char_is_after(active, curr_char) if match: - active = skip_count(1, active) + print(3) + active = skip_count(1, active, i) remainder += 1 did_rule_three = True else: + print(2) if not active.is_explicit(): mediator = split_edge(active) mediator.add_child(Node(i, "#")) + mediator.link = need_link_to + need_link_to = mediator + active.length -= 1 else: active.node.add_child(Node(i, "#")) - remainder -= 1 - last_j = i + 1 + remainder = pos(remainder-1) + active.set_node(active.node.link) + if remainder > 0: + active = skip_count(remainder, root_point, i-remainder) + last_j = j j += 1 + root.print_tree() return active, remainder, last_j @@ -151,12 +167,14 @@ def char_is_after(point: Point, char): if point.is_explicit(): return char in point.node.children else: - if point.length < point.node.edge_length: # If not at the end of an edge - return Node.string[point.index_here() + point.length] - return None + if point.length == point.edge_node.edge_length: + return Node.string[point.edge_node.start] == char + else: # If not at the end of an edge + return Node.string[point.index_here() + point.length] == char -def skip_count(num_chars, start_point: Point): + +def skip_count(num_chars, start_point: Point, index): incoming_length = -1 existing_length = 0 head = start_point @@ -170,21 +188,24 @@ def skip_count(num_chars, start_point: Point): return head head.set_node(head.edge_node) chars_left -= incoming_length + index += incoming_length + # Node.string[i] if head.node.root else Node.string[head.node.end_index + 1] # assert head.node.end_index + 1 + chars_left < len(Node.string) while chars_left > incoming_length: # assert head.node.end_index + 1 + chars_left < len(Node.string) - direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] + direction = Node.string[index] next_node = head.node.get_child(direction) if next_node is None: - return None + raise Exception(f"Attempted to traverse char\n '{direction}' at point {head}. ({index=})") incoming_length = next_node.edge_length - if chars_left <= incoming_length: + if chars_left < incoming_length: break chars_left -= incoming_length + index += incoming_length head.set_node(next_node) - direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] + direction = Node.string[index] if chars_left > 0: # Landed on an edge head.edge = direction @@ -200,12 +221,17 @@ def ukkonen(string): last_j = 1 root = create_root() root.add_child(Node(0, "#")) + needs_link_to = root # next = Node(4, 6) # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) # root.add_child(Node(1, 6)) active = Point(root) - for i in range(1, n): - active, remainder, last_j = do_phase(root, active, i, last_j, remainder) + for i in range(1, 9): + active, remainder, last_j = do_phase(root, active, i, last_j, remainder, needs_link_to) + + place = Point(root, "a", 1) + # print(char_is_after(place, "b")) + # Node.global_end = len(string) # root.print_tree() # place = Point(next, "", 0) @@ -215,8 +241,7 @@ def ukkonen(string): # place = skip_count(3, place) # print(place, place.index_here(), place.char_here()) # split_edge(place) - root.print_tree() -ukkonen("abcdefghijklmnop$") +ukkonen("abacabad$") |
