import sys from ass2.ukkonen import ukkonen, Point, Node def longest_prefix(root: Node, s1: str, s2: str, offset: tuple): """ Finds the longest prefix of a suffix of two different strings, provided a suffix tree of "s1&s2" The offset tuple represents the start index of each suffix to compare to. Starting from the root, if the two strings have a matching character, move in that direction of the suffix tree Increment the index to compare the next characters by, by the length of the edge just traveled Stop when both strings 'disagree' on which path to take. This effectively finds the deepest common ancestor of two suffixes on the same tree. """ index = 0 head = Point(root) shortest = min(len(s1) - offset[0], len(s2) - offset[1]) while index < shortest: # Stop searching when you hit the end of the shortest string if s1[index + offset[0]] == s2[index + offset[1]]: going_to = head.node.get_child(s1[index + offset[0]]) head.set_node(going_to) # Move to new chosen node index += going_to.edge_length else: return index return index def read_in_string(filename): """ Reads in a string from a given file name """ with open(filename, "r") as file: return file.read() def create_suffix_list(filename): """ From the suffix index file, generate a list of tuples representing each suffix pair to search """ tuples = [] for line in read_in_string(filename).split("\n"): parts = line.split(" ") tuples.append((int(parts[0]), int(parts[1]))) return tuples def write_longest_prefixes(root: Node, s1: str, s2: str, suffixes): """ Write the longest common suffixes of two strings provided a list of suffix index pairs to file """ results = map(lambda t: f"{t[0]} {t[1]} {longest_prefix(root, s1, s2, t)}", suffixes) with open("output_lcp.txt", "w") as file: file.write("\n".join(results)) def main(): assert len(sys.argv) == 4 # Need correct number of arguments s1 = read_in_string(sys.argv[1]) s2 = read_in_string(sys.argv[2]) suffixes = create_suffix_list(sys.argv[3]) combined = s1 + "&" + s2 tree = ukkonen(combined) # Generate the suffix tree for both strings once write_longest_prefixes(tree, s1, s2, suffixes) # Write answers to file if __name__ == "__main__": main()