import heapq import sys class HuffNode: def __init__(self, string, count): self.string = string self.count = count self.left = None self.right = None self.binary = None self.height = 0 def __add__(self, other): assert isinstance(other, HuffNode) new_node = HuffNode(self.string + other.string, self.count + other.count) new_node.left = self new_node.right = other new_node.height += 1 + max(self.height, other.height) return new_node def __lt__(self, other): if self.count == other.count: return self.height < other.height return self.count < other.count def __repr__(self): return f"|'{self.string}', {self.count}|" def num_children(self): return (self.left is not None) + (self.right is not None) def dfs_apply(self, func): func(self) if self.left is not None: self.left.dfs_apply(func) if self.right is not None: self.right.dfs_apply(func) def count_unique(string): count = {} for char in string: if char in count: count[char] += 1 else: count[char] = 1 return count def huffman(string): count = count_unique(string) alphabet = "".join(count.keys()) nodes = [HuffNode(letter, count[letter]) for letter in alphabet] leaves = nodes[:] heapq.heapify(nodes) while len(nodes) > 1: first, second = heapq.heappop(nodes), heapq.heappop(nodes) new_node = first + second heapq.heappush(nodes, new_node) root = nodes[0] huffman_calculate(root) result = {} for leaf in leaves: result[leaf.string] = leaf.binary return result def huffman_calculate(node: HuffNode, bin_prefix=""): if node.num_children() == 0: node.binary = bin_prefix return huffman_calculate(node.left, bin_prefix + "0") huffman_calculate(node.right, bin_prefix + "1") def elias(n): assert n > 0 binary = bin(n)[2:] result = binary while len(binary) != 1: binary = "0" + bin(len(binary) - 1)[3:] result = binary + result return result def header(string): huff_code = huffman(string) alphabet_size = len(huff_code.keys()) output = f"{elias(alphabet_size)}" for char in huff_code: output += bin(ord(char))[2:] output += elias(len(huff_code[char])) output += huff_code[char] return output def read_file(filename): with open(filename, "r") as file: contents = file.read() return contents def write_file(filename, contents): with open(filename, "w") as file: file.write(contents) def main(): assert len(sys.argv) >= 2 string = read_file(sys.argv[1]) write_file("output_header.txt", header(string)) if __name__ == "__main__": main()