"""
Alexander Occhipinti - 29994705
This file is imported into questions 2 and 3.
"""

import sys

ALPHABET_SIZE = 28


class OrderedDict(dict):
    """
    A hybrid Python dictionary/list
        All set/get item operations on this data structure are the same complexity of a normal dictionary O(1)-ish
        For Ukkonen's operation, only the normal dictionary features are used.
    As values are stored in the dictionary, the are also referenced in a list of size O(alphabet).
        This acts as a kind of 'counting sort' when accessed is O(n), but provides a pre-sorted list of all children nodes
        This is used for generating the suffix array.
    """
    def __init__(self):
        super().__init__()
        self.first_letters = [None for _ in range(ALPHABET_SIZE)]

    def __setitem__(self, key, value):
        super().__setitem__(key, value)
        try:
            self.first_letters[self.rank(key)] = value
        except IndexError:
            raise IndexError(f"Could not add item of key '{key}' since it is out of range of the rank function.")

    def __delitem__(self, key):
        super().__delitem__(key)
        self.first_letters[self.rank(key)] = None

    def ordered_items(self):  # Return iterable of pre-sorted items (for suffix array)
        return filter(lambda x: x is not None, self.first_letters)

    @staticmethod
    def rank(char):
        """
        Define a number value to an alphabet letter, including special characters so they can fit in a list
        """
        if char == "$":  # Designates end of string
            return 0
        elif char == "&":  # Designates break in two strings (Used for Q3)
            return 1
        else:
            return ord(char) - 95  # Letters a-z


class Node:
    """
    Represents an arbitrary node in a suffix tree
    Also statically sotres some state information about the algorithm (not pretty, I know)
    """
    global_end = 0
    num_splits = 0
    all_nodes = []
    string = ""

    def __init__(self, start, end):
        self.root = False
        self.start = start
        self.end = end
        self.children = OrderedDict()
        self.id = len(self.all_nodes)
        self.suffix_index = self.id - self.num_splits - 1
        self.all_nodes.append(self)
        self.parent = None
        self.link = None

    def __str__(self):
        """
        String representation of node, shows important internal values of a node (for debug)
        """
        link_str = "" if self.link is None else f" -> {self.link.id}"
        if not self.root:
            j, i = self.tuple()
            return f"[{self.id}, {self.tuple()}, {self.string[j:i + 1]}{link_str}]"
        return f"[{self.id} root{link_str}]"

    def __repr__(self):  # Shorter representation of node
        return f"[{self.id}]"

    def print_tree(self, spaces=1):
        """
        Recursively prints tree of nodes (for debug)
        """
        print(f"{self}")
        for edge in self.children:
            print(f"   " * spaces, end="")
            self.get_child(edge).print_tree(spaces=spaces + 1)

    def first_char(self):
        return self.string[self.start]

    def get_child(self, char):
        if char in self.children:
            return self.children[char]
        return None

    def add_child(self, child):
        child.parent = self
        self.children[child.first_char()] = child
        return child

    def remove_child(self, child):
        self.children.pop(child.first_char())

    @property
    def end_index(self):  # Translates end index into a number (could be '#' pointer)
        return self.tuple()[1]

    def tuple(self):
        """
        Returns the resolved start and end coordinates of the substring this node represents
        """
        if self.root:
            raise Exception("Can't get substring of root.")
        if self.end == "#":  # Translate '#' into global_end
            return self.start, self.global_end
        return self.start, self.end

    @property
    def edge_length(self):
        if self.root:
            return 0
        else:
            start, end = self.tuple()
            return end - start + 1

    def detach(self):
        self.parent.remove_child(self)
        self.parent = None


class Point:
    """
    A representation of a single point on the tree. Used to store active node, edge and length data in one place
    Could represent a place in the middle of an edge (implicit) or a place on a node (explicit).
    Abstracts away a lot of tedium regarding working with these closely connected values.
    Can be used to create 'pure' functions which return a transformation on a given point
    """
    def __init__(self, node, edge="", length=0):
        assert isinstance(node, Node)
        self.node = node
        self.edge = edge
        self.length = length

    def __repr__(self):
        return f"(Node {self.node.id}'s edge:'{self.edge}', {self.length} along.)"

    def is_explicit(self):  # a.k.a. is not on an edge
        return self.edge == ""

    def set_node(self, node):  # Set point to a specific node, reset other values
        self.node = node
        self.edge = ""
        self.length = 0

    @property
    def edge_node(self) -> Node:  # Return the Node object of the edge this object points to
        return self.node.get_child(self.edge)

    def index_here(self):
        """
        Return the index in the original string that this point refers to
        """
        if self.is_explicit():
            return 0 if self.node.root else self.node.start
        return self.edge_node.start + self.length - 1

    def char_here(self):
        """
        Return the char in the original string that this point refers to
        """
        return Node.string[self.index_here()]


def create_root():
    """
    Create a root node with special root properties. Used to initalise the algorithm
    """
    assert len(Node.all_nodes) == 0
    root = Node(None, None)
    root.root = True
    root.link = root
    return root


def split_edge(split_point: Point):
    """
    Split a given edge into two separate edges, creating a new node in the middle (called a mediator in my code)
    Used for Rule 2s on implicit suffixes.
    Returns the newly created mediator node
    """
    assert not split_point.is_explicit()
    edge = split_point.edge_node
    original = edge.tuple()
    edge.detach()
    Node.num_splits += 1
    mediator = Node(original[0], original[0] + split_point.length - 1)
    mediator.suffix_index = None
    edge.start = original[0] + split_point.length
    assert edge.start <= edge.end_index
    mediator.add_child(edge)
    split_point.node.add_child(mediator)
    return mediator


def pos(n: int):
    return max(n, 0)


def do_phase(root: Node, active: Point, i, last_j, remainder):
    """
    Performs a single phase of Ukkonen's algorithm, returning values used for the next phase.
    """

    # Initialisation
    root_point = Point(root)
    Node.global_end += 1  # Perform rapid leaf extension trick (Rule 1)
    did_rule_three = False
    j = last_j + 1
    node_just_created = None

    while not did_rule_three and j <= i + 1:  # Run only the required extensions for this phase

        curr_char = Node.string[i]
        match = char_is_after(active, curr_char)
        if match:  # Decide if Rule 2 or 3.
            # RULE 3 LOGIC
            remainder += 1
            if node_just_created is not None:
                node_just_created.link = active.node  # Create suffix link (Rule 3)
            active = skip_count(1, active, i)  # Move active node
            did_rule_three = True  # Break loop
        else:
            # RULE 2 LOGIC
            if not active.is_explicit():  # Active point on an edge, need to split
                mediator = split_edge(active)
                mediator.add_child(Node(i, "#"))  # Dangle new character off of mediator node from split
                if node_just_created is not None:
                    node_just_created.link = mediator  # Create suffix link (First sub-case)
                node_just_created = mediator
                active.length -= 1
                if active.length == 0:
                    active.set_node(active.node)
            else:  # Active point on node, just dangle off a new node
                active.node.add_child(Node(i, "#"))
                if node_just_created is not None and node_just_created.link is None:
                    node_just_created.link = active.node  # Create suffix link (Second sub-case)
            remainder = pos(remainder - 1)
            active.set_node(active.node.link)  # Go to suffix link
            if remainder > 0:
                active = skip_count(remainder, Point(root), i - remainder)  # Traverse from root
            last_j = j
            j += 1
        # print(active)
        # root.print_tree()
    return active, remainder, last_j


def char_is_after(point: Point, char):
    """
    Return if a given character is traversable directly after a given point
    Used for Rule 2/3 selection
    """
    if point.is_explicit():  # If point on a node
        return char in point.node.children
    else:  # If point on an edge
        if point.length == point.edge_node.edge_length:
            return Node.string[point.edge_node.start] == char
        else:  # If not at the end of an edge
            return Node.string[point.index_here() + 1] == char


def skip_count(num_chars, start_point: Point, index):
    """
    Use the skip-counting trick to traverse num_chars down from point start_point.
    Use index value as where to start looking in the string for char comparison
    Returns the point that the traversal lands on
    """

    # Initialise
    incoming_length = -1
    existing_length = 0
    head = start_point
    chars_left = num_chars
    char = ""

    # Move point to nearest node if it is on an edge
    if not head.is_explicit():
        incoming_length = head.edge_node.edge_length - head.length
        if num_chars < incoming_length:
            head.length += num_chars
            return head
        head.set_node(head.edge_node)
        chars_left -= incoming_length
        index += incoming_length

    # Main traversal loop
    while chars_left > 0:
        direction = Node.string[index]  # Choose a direction to go from this point
        next_node = head.node.get_child(direction)
        if next_node is None:  # Went off the tree -> error
            raise IndexError(f"Attempted to traverse char\n '{direction}' at point {head}. ({index=})")
        incoming_length = next_node.edge_length
        if chars_left < incoming_length:  # Break if we able can't go down that edge
            break
        # Move down edge to next node
        chars_left -= incoming_length
        index += incoming_length
        head.set_node(next_node)

    # Return position on edge if couldn't traverse a final edge (search landed on edge)
    if chars_left > 0:
        head.edge = Node.string[index]
        head.length = chars_left

    return head


def ukkonen(string):
    """
    Reset the algorithm values and create return the root of a suffix tree for a given string
    using everyone's favourite algorithm: Ukkonen's algorithm. O(n) time.
    """
    # Initialise values
    string += "$"
    Node.string = string
    Node.global_end = 0
    Node.num_splits = 0
    Node.all_nodes.clear()
    n = len(string)
    remainder = 0
    last_j = 1
    # Perform base case i = 0 phase
    root = create_root()
    root.add_child(Node(0, "#"))
    active = Point(root)
    # Perform rest of phases
    for i in range(1, n):
        active, remainder, last_j = do_phase(root, active, i, last_j, remainder)
    return root


if __name__ == "__main__":
    ukkonen("abacabad")
    print("done")