aboutsummaryrefslogtreecommitdiff
path: root/ass2/q3/lcp.py
blob: 6c7277123769983e625426876bfe2069b9110c3c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import sys
from ass2.ukkonen import ukkonen, Point, Node


def longest_prefix(root: Node, s1: str, s2: str, offset: tuple):
    """
    Finds the longest prefix of a suffix of two different strings, provided a suffix tree of "s1&s2"
    The offset tuple represents the start index of each suffix to compare to.

    Starting from the root, if the two strings have a matching character, move in that direction of the suffix tree
    Increment the index to compare the next characters by, by the length of the edge just traveled
    Stop when both strings 'disagree' on which path to take.
    This effectively finds the deepest common ancestor of two suffixes on the same tree.
    """
    index = 0
    head = Point(root)
    shortest = min(len(s1) - offset[0], len(s2) - offset[1])
    while index < shortest:  # Stop searching when you hit the end of the shortest string
        if s1[index + offset[0]] == s2[index + offset[1]]:
            going_to = head.node.get_child(s1[index + offset[0]])
            head.set_node(going_to)  # Move to new chosen node
            index += going_to.edge_length
        else:
            return index
    return index


def read_in_string(filename):
    """
    Reads in a string from a given file name
    """
    with open(filename, "r") as file:
        return file.read()


def create_suffix_list(filename):
    """
    From the suffix index file, generate a list of tuples representing each suffix pair to search
    """
    tuples = []
    for line in read_in_string(filename).split("\n"):
        parts = line.split(" ")
        tuples.append((int(parts[0]), int(parts[1])))
    return tuples


def write_longest_prefixes(root: Node, s1: str, s2: str, suffixes):
    """
    Write the longest common suffixes of two strings provided a list of suffix index pairs to file
    """
    results = map(lambda t: f"{t[0]} {t[1]} {longest_prefix(root, s1, s2, t)}", suffixes)
    with open("output_lcp.txt", "w") as file:
        file.write("\n".join(results))


def main():
    assert len(sys.argv) == 4  # Need correct number of arguments
    s1 = read_in_string(sys.argv[1])
    s2 = read_in_string(sys.argv[2])
    suffixes = create_suffix_list(sys.argv[3])
    combined = s1 + "&" + s2
    tree = ukkonen(combined)  # Generate the suffix tree for both strings once
    write_longest_prefixes(tree, s1, s2, suffixes) # Write answers to file


if __name__ == "__main__":
    main()