From f5c908a51d2b906aa216aece440e74376037b4d0 Mon Sep 17 00:00:00 2001 From: akiyamn Date: Mon, 19 Apr 2021 17:24:21 +1000 Subject: Ass2 Q1 is basically done --- ass2/q1/edges.txt | 12 ++++++ ass2/q1/kruskals.py | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ ass2/q2/suffix_array.py | 83 ++++++++++++++++++++++++++++++------------ 3 files changed, 169 insertions(+), 23 deletions(-) create mode 100644 ass2/q1/edges.txt create mode 100644 ass2/q1/kruskals.py diff --git a/ass2/q1/edges.txt b/ass2/q1/edges.txt new file mode 100644 index 0000000..9057793 --- /dev/null +++ b/ass2/q1/edges.txt @@ -0,0 +1,12 @@ +0 1 5 +1 2 4 +2 3 2 +3 4 10 +4 5 6 +5 0 1 +0 6 8 +1 6 2 +2 6 1 +3 6 3 +4 6 7 +5 6 4 \ No newline at end of file diff --git a/ass2/q1/kruskals.py b/ass2/q1/kruskals.py new file mode 100644 index 0000000..4b8da4d --- /dev/null +++ b/ass2/q1/kruskals.py @@ -0,0 +1,97 @@ +from functools import reduce + + +class Edge: + def __init__(self, start, end, weight): + self.start = start + self.end = end + self.weight = weight + + def __str__(self): + return f"{self.start} -({self.weight})-> {self.end}" + + def __repr__(self): + return str(self) + + def __int__(self): + return self.weight + +def create_edge_from_line(line): + data = map(int, line.split(" ")) + return Edge(*data) + + +def read_edges(filename): + edges = [] + with open(filename, "r") as file: + lines = file.read().split("\n") + for line in lines: + edges.append(create_edge_from_line(line)) + return edges + + +def count_vertices(edges): + v = -1 + for edge in edges: + v = max(v, edge.start, edge.end) + return v + 1 + + +def sort_edges(edges): + return sorted(edges, key=lambda e: e.weight) # Timsort, O(e log e) + + +def find(groups, vertex): + # return vertex if groups[vertex] < 0 else find(groups, groups[vertex]) + if groups[vertex] < 0: + return vertex + else: + return find(groups, groups[vertex]) + + +def union(groups, u, v): + root_u = find(groups, u) + root_v = find(groups, v) + if root_u == root_v: + return + height_u = -groups[root_u] + height_v = -groups[root_v] + if height_u > height_v: + groups[root_v] = root_u + elif height_v > height_u: + groups[root_u] = root_v + else: + groups[root_u] = root_v + groups[root_v] = -(height_v+1) + + +def is_spanning(min_span_tree, v): + exist = [False for _ in range(v)] + for edge in min_span_tree: + exist[edge.start] = True + exist[edge.end] = True + return all(exist) + + +def main(): + + # print(sum(map(int, [2, Edge(0, 1, 5)]))) + + edges = read_edges("edges.txt") + print(edges) + print(sort_edges(edges)) + v = count_vertices(edges) + groups = [-1 for _ in range(v)] + min_span_tree = [] + for edge in sort_edges(edges): + if find(groups, edge.start) != find(groups, edge.end): + union(groups, edge.start, edge.end) + min_span_tree.append(edge) + + weight_total = sum(map(int, min_span_tree)) + print(weight_total) + print("\n".join(map(str, min_span_tree))) + assert is_spanning(min_span_tree, v) + + +main() diff --git a/ass2/q2/suffix_array.py b/ass2/q2/suffix_array.py index 1bb88b9..db22c3e 100644 --- a/ass2/q2/suffix_array.py +++ b/ass2/q2/suffix_array.py @@ -9,13 +9,15 @@ class Node: self.id = len(self.all) self.all.append(self) self.parent = None + self.link = None def __str__(self): tup = self.get_tuple() + link_str = "" if self.link is None else f" -> {self.link.id}" if tup is not None: j, i = tup - return f"[{self.id}, {self.data}, {self.string[j:i + 1]}]" - return f"[{self.id} root]" + return f"[{self.id}, {self.data}, {self.string[j:i + 1]}{link_str}]" + return f"[{self.id} root{link_str}]" def __repr__(self): return f"[{self.id}]" @@ -41,7 +43,17 @@ class Node: def bastardise(self): self.parent.remove_child(self) - parent = None + self.parent = None + + def add_suffix_link(self, destination): + assert self.link is None + assert len(self.children) > 0 + assert isinstance(destination, Node) + self.link = destination + + def follow_link(self): + assert self.link is not None + return self.link def get_tuple(self): if self.data is not None and self.data[1] == "#": @@ -71,7 +83,7 @@ class Node: child.print_tree(spaces=spaces + 1) -def branch_from_point(active_node, active_edge, active_length, j): +def branch_from_point(active_node, active_edge, active_length, j, need_link_to): edge = active_node.get_child(active_edge) edge.bastardise() original_substr = edge.get_tuple() @@ -79,7 +91,9 @@ def branch_from_point(active_node, active_edge, active_length, j): mediator = Node((original_substr[0], original_substr[0] + active_length - 1)) active_node.add_child(mediator) mediator.add_child(edge) - mediator.add_child(Node((j + 1, '#'))) + mediator.add_child(Node((j+1, '#'))) + # assert j+1 != 12 + return mediator def traverse_down(string, substring, active_node, active_edge, active_length, remainder): @@ -89,18 +103,25 @@ def traverse_down(string, substring, active_node, active_edge, active_length, re char = "" remainder -= 0 if active_node.data is None else active_node.data[0] + 1 while remainder > 0: + char = string[i + traversed] - path = active_node.get_child(char) - assert path is not None + path = None + if active_length > 0: + path = active_node.get_child(active_edge) + else: + path = active_node.get_child(char) + if path is None: + raise Exception(f"Tried to go down '{char}' from point: {active_node, active_edge, active_length}") traversed += path.edge_length() g = min(remainder, path.edge_length()) # if path is None: # return active_node, active_edge, active_length - if remainder >= path.edge_length(): + if remainder >= path.edge_length() - active_length: active_node = path + print(active_node.link is None) active_length = 0 active_edge = '' - remainder -= path.edge_length() + remainder -= path.edge_length() - active_length else: active_length = remainder active_edge = char if active_length != 0 else None @@ -113,10 +134,9 @@ def char_match_after(char, active_node, active_edge, active_length): if active_length < 1: return active_node.get_child(char) is not None edge = active_node.get_child(active_edge) - position = edge.data[0] + active_length + 1 + position = edge.data[0] + active_length return Node.string[position] == char - def suffix_tree(string): # string += "$" Node.string = string @@ -124,39 +144,56 @@ def suffix_tree(string): last_j = 0 root = Node() root.add_child(Node((0, '#'))) + root.add_suffix_link(root) + need_link_to = root active_node = root active_edge = "" active_length = 0 remainder = 0 - last_rule = 0 for i in range(1, n): + + # if active_node.link is not None: + # active_node = active_node.link + # if active_node == active_node.link: + # remainder -= 1 Node.global_end += 1 # Rule 1s implicitly for j in range(last_j + 1, i + 1): - print(string[j:i + 1], end=": ") - # if 2 or more letter string, just match the last character at the current position (we already went there) i.e. i i think - char = string[i] if i-j <= 1 else string[j] + print(string[0:i + 1], end=": ") + # if 2 or more letter string, just match the last character at the current position (we already went + # there) i.e. i i think + char = string[i] if i - j <= 1 else string[i] match = char_match_after(char, active_node, active_edge, active_length) if match: # Rule 3 print(3, -j) remainder += 1 active_node, active_edge, active_length = traverse_down(string, (j, i), active_node, active_edge, active_length, remainder) - break else: # Rule 2 print(2, -j) - if active_length > 0: - branch_from_point(active_node, active_edge, active_length, j) - else: - active_node.add_child(Node((j, '#'))) + # new_node = None + if active_length > 0: # If the pointer is on an edge + # if active_node.link is not None: + # active_node = active_node.link + new_node = branch_from_point(active_node, active_edge, active_length, j, need_link_to) + new_node.add_suffix_link(need_link_to) + need_link_to = new_node + print(f"*** I want to go from {repr(active_node)} -> {repr(active_node.link)} ***") + # if active_node.link is not None: + # active_node = active_node.link + else: # If the pointer is on a node + new_node = Node((j, '#')) + active_node.add_child(new_node) + active_edge = "" remainder = max(remainder - 1, 0) active_length = 0 last_j = j - print(f"{i=} '{string[i]}' => {(active_node, active_edge, active_length)}, {remainder}") - root.print_tree() - print("\n" * 10) + print(f"{i=} '{string[i]}' => {(active_node, active_edge, active_length)}, {remainder}") + # print(f"{i=} '{string[i]}' => {(active_node, active_edge, active_length)}, {remainder}") + root.print_tree() + print("\n" * 10) suffix_tree("abacabad$") -- cgit v1.2.3