From 16f55f173b98796f1f548a9505bbd25c05d65f98 Mon Sep 17 00:00:00 2001 From: akiyamn Date: Fri, 23 Apr 2021 21:28:41 +1000 Subject: Ass 2: Rule 2 maybe working? --- .gitignore | 2 ++ ass2/q2/new_suffix_array.py | 51 ++++++++++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 40562a6..e1f7e8d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ +output_kruskals.txt .idea + # ---> Python # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/ass2/q2/new_suffix_array.py b/ass2/q2/new_suffix_array.py index 259b1ef..0bc5bd5 100644 --- a/ass2/q2/new_suffix_array.py +++ b/ass2/q2/new_suffix_array.py @@ -122,16 +122,28 @@ def split_edge(split_point: Point): split_point.node.add_child(mediator) return mediator -def do_phase(root: Node, active: Point, i, last_j): +def do_phase(root: Node, active: Point, i, last_j, remainder): root_point = Point(root) Node.global_end += 1 did_rule_three = False j = last_j + 1 - while not did_rule_three: + while not did_rule_three and j <= i+1: curr_char = Node.string[i] - new_point = skip_count(i + 1, root_point) - if curr_char == active.char_here(): - pass + match = char_is_after(active, curr_char) + if match: + active = skip_count(1, active) + remainder += 1 + did_rule_three = True + else: + if not active.is_explicit(): + mediator = split_edge(active) + mediator.add_child(Node(i, "#")) + else: + active.node.add_child(Node(i, "#")) + remainder -= 1 + last_j = i + 1 + j += 1 + return active, remainder, last_j @@ -159,11 +171,13 @@ def skip_count(num_chars, start_point: Point): head.set_node(head.edge_node) chars_left -= incoming_length - assert head.node.end_index + 1 + chars_left < len(Node.string) + # assert head.node.end_index + 1 + chars_left < len(Node.string) while chars_left > incoming_length: - assert head.node.end_index + 1 + chars_left < len(Node.string) + # assert head.node.end_index + 1 + chars_left < len(Node.string) direction = Node.string[0] if head.node.root else Node.string[head.node.end_index + 1] next_node = head.node.get_child(direction) + if next_node is None: + return None incoming_length = next_node.edge_length if chars_left <= incoming_length: break @@ -185,18 +199,21 @@ def ukkonen(string): remainder = 0 last_j = 1 root = create_root() - next = Node(4, 6) - root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) - root.add_child(Node(1, 6)) + root.add_child(Node(0, "#")) + # next = Node(4, 6) + # root.add_child(Node(0, 3)).add_child(next).add_child(Node(7, 7)).add_child(Node(8, "#")) + # root.add_child(Node(1, 6)) active = Point(root) - Node.global_end = len(string) - root.print_tree() - place = Point(next, "", 0) + for i in range(1, n): + active, remainder, last_j = do_phase(root, active, i, last_j, remainder) + # Node.global_end = len(string) + # root.print_tree() + # place = Point(next, "", 0) + # print(place, place.index_here(), place.char_here()) + # place = skip_count(3, place) + # place = skip_count(4, place) + # place = skip_count(3, place) # print(place, place.index_here(), place.char_here()) - place = skip_count(3, place) - place = skip_count(4, place) - place = skip_count(3, place) - print(place, place.index_here(), place.char_here()) # split_edge(place) root.print_tree() -- cgit v1.2.3