diff options
Diffstat (limited to 'ass3')
| -rw-r--r-- | ass3/q2/header.py | 106 | 
1 files changed, 106 insertions, 0 deletions
| diff --git a/ass3/q2/header.py b/ass3/q2/header.py new file mode 100644 index 0000000..8d98fb4 --- /dev/null +++ b/ass3/q2/header.py @@ -0,0 +1,106 @@ +import heapq + + +class HuffNode: +    def __init__(self, string, count): +        self.string = string +        self.count = count +        self.left = None +        self.right = None +        self.binary = None +        self.height = 0 + +    def __add__(self, other): +        assert isinstance(other, HuffNode) +        new_node = HuffNode(self.string + other.string, self.count + other.count) +        new_node.left = self +        new_node.right = other +        new_node.height += 1 + max(self.height, other.height) +        return new_node + +    def __lt__(self, other): +        if self.count == other.count: +            return self.height < other.height +        return self.count < other.count + +    def __repr__(self): +        return f"|'{self.string}', {self.count}|" + +    def num_children(self): +        return (self.left is not None) + (self.right is not None) + +    def dfs_apply(self, func): +        func(self) +        if self.left is not None: +            self.left.dfs_apply(func) +        if self.right is not None: +            self.right.dfs_apply(func) + + +def count_unique(string): +    count = {} +    for char in string: +        if char in count: +            count[char] += 1 +        else: +            count[char] = 1 +    return count + + +def huffman(string): +    count = count_unique(string) +    alphabet = "".join(count.keys()) +    nodes = [HuffNode(letter, count[letter]) for letter in alphabet] +    leaves = nodes[:] +    heapq.heapify(nodes) +    while len(nodes) > 1: +        first, second = heapq.heappop(nodes), heapq.heappop(nodes) +        new_node = first + second +        heapq.heappush(nodes, new_node) +    root = nodes[0] +    root.dfs_apply(print) +    huffman_calculate(root) +    result = {} +    for leaf in leaves: +        result[leaf.string] = leaf.binary +    return result + + +def huffman_calculate(node: HuffNode, bin_prefix=""): +    if node.num_children() == 0: +        node.binary = bin_prefix +        return +    huffman_calculate(node.left, bin_prefix + "0") +    huffman_calculate(node.right, bin_prefix + "1") + + +def elias(n): +    assert n > 0 +    binary = bin(n)[2:] +    result = binary +    while len(binary) != 1: +        binary = "0" + bin(len(binary)-1)[3:] +        result = binary + result +    return result + +def header(string): +    huff_code = huffman(string) +    alphabet_size = len(huff_code.keys()) +    output = f"{elias(alphabet_size)}" +    output2 = f"{alphabet_size} -- " +    for char in huff_code: +        output += bin(ord(char))[2:] +        output += elias(len(huff_code[char])) +        output += huff_code[char] + +        output2 += "(" +        output2 += str(ord(char)) +        output2 += ": " +        output2 += str(len(huff_code[char])) +        output2 += " -> '" +        output2 += huff_code[char] +        output2 += "'), " +    return output, output2 + + +print(header("aacaacabcaba")) | 
