diff options
Diffstat (limited to 'ass3')
| -rw-r--r-- | ass3/q2/header.py | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/ass3/q2/header.py b/ass3/q2/header.py new file mode 100644 index 0000000..8d98fb4 --- /dev/null +++ b/ass3/q2/header.py @@ -0,0 +1,106 @@ +import heapq + + +class HuffNode: + def __init__(self, string, count): + self.string = string + self.count = count + self.left = None + self.right = None + self.binary = None + self.height = 0 + + def __add__(self, other): + assert isinstance(other, HuffNode) + new_node = HuffNode(self.string + other.string, self.count + other.count) + new_node.left = self + new_node.right = other + new_node.height += 1 + max(self.height, other.height) + return new_node + + def __lt__(self, other): + if self.count == other.count: + return self.height < other.height + return self.count < other.count + + def __repr__(self): + return f"|'{self.string}', {self.count}|" + + def num_children(self): + return (self.left is not None) + (self.right is not None) + + def dfs_apply(self, func): + func(self) + if self.left is not None: + self.left.dfs_apply(func) + if self.right is not None: + self.right.dfs_apply(func) + + +def count_unique(string): + count = {} + for char in string: + if char in count: + count[char] += 1 + else: + count[char] = 1 + return count + + +def huffman(string): + count = count_unique(string) + alphabet = "".join(count.keys()) + nodes = [HuffNode(letter, count[letter]) for letter in alphabet] + leaves = nodes[:] + heapq.heapify(nodes) + while len(nodes) > 1: + first, second = heapq.heappop(nodes), heapq.heappop(nodes) + new_node = first + second + heapq.heappush(nodes, new_node) + root = nodes[0] + root.dfs_apply(print) + huffman_calculate(root) + result = {} + for leaf in leaves: + result[leaf.string] = leaf.binary + return result + + +def huffman_calculate(node: HuffNode, bin_prefix=""): + if node.num_children() == 0: + node.binary = bin_prefix + return + huffman_calculate(node.left, bin_prefix + "0") + huffman_calculate(node.right, bin_prefix + "1") + + +def elias(n): + assert n > 0 + binary = bin(n)[2:] + result = binary + while len(binary) != 1: + binary = "0" + bin(len(binary)-1)[3:] + result = binary + result + return result + +def header(string): + huff_code = huffman(string) + alphabet_size = len(huff_code.keys()) + output = f"{elias(alphabet_size)}" + output2 = f"{alphabet_size} -- " + for char in huff_code: + output += bin(ord(char))[2:] + output += elias(len(huff_code[char])) + output += huff_code[char] + + output2 += "(" + output2 += str(ord(char)) + output2 += ": " + output2 += str(len(huff_code[char])) + output2 += " -> '" + output2 += huff_code[char] + output2 += "'), " + return output, output2 + + +print(header("aacaacabcaba")) |
