1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
import sys
from ass2.ukkonen import ukkonen, Point, Node
def longest_prefix(root: Node, s1: str, s2: str, offset: tuple):
"""
Finds the longest prefix of a suffix of two different strings, provided a suffix tree of "s1&s2"
The offset tuple represents the start index of each suffix to compare to.
Starting from the root, if the two strings have a matching character, move in that direction of the suffix tree
Increment the index to compare the next characters by, by the length of the edge just traveled
Stop when both strings 'disagree' on which path to take.
This effectively finds the deepest common ancestor of two suffixes on the same tree.
"""
index = 0
head = Point(root)
shortest = min(len(s1) - offset[0], len(s2) - offset[1])
while index < shortest: # Stop searching when you hit the end of the shortest string
if s1[index + offset[0]] == s2[index + offset[1]]:
going_to = head.node.get_child(s1[index + offset[0]])
head.set_node(going_to) # Move to new chosen node
index += going_to.edge_length
else:
return index
return index
def read_in_string(filename):
"""
Reads in a string from a given file name
"""
with open(filename, "r") as file:
return file.read()
def create_suffix_list(filename):
"""
From the suffix index file, generate a list of tuples representing each suffix pair to search
"""
tuples = []
for line in read_in_string(filename).split("\n"):
parts = line.split(" ")
tuples.append((int(parts[0]), int(parts[1])))
return tuples
def write_longest_prefixes(root: Node, s1: str, s2: str, suffixes):
"""
Write the longest common suffixes of two strings provided a list of suffix index pairs to file
"""
results = map(lambda t: f"{t[0]} {t[1]} {longest_prefix(root, s1, s2, t)}", suffixes)
with open("output_lcp.txt", "w") as file:
file.write("\n".join(results))
def main():
assert len(sys.argv) == 4 # Need correct number of arguments
s1 = read_in_string(sys.argv[1])
s2 = read_in_string(sys.argv[2])
suffixes = create_suffix_list(sys.argv[3])
combined = s1 + "&" + s2
tree = ukkonen(combined) # Generate the suffix tree for both strings once
write_longest_prefixes(tree, s1, s2, suffixes) # Write answers to file
if __name__ == "__main__":
main()
|