1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
import heapq
import sys
class HuffNode:
def __init__(self, string, count):
self.string = string
self.count = count
self.left = None
self.right = None
self.binary = None
self.height = 0
def __add__(self, other):
assert isinstance(other, HuffNode)
new_node = HuffNode(self.string + other.string, self.count + other.count)
new_node.left = self
new_node.right = other
new_node.height += 1 + max(self.height, other.height)
return new_node
def __lt__(self, other):
if self.count == other.count:
return self.height < other.height
return self.count < other.count
def __repr__(self):
return f"|'{self.string}', {self.count}|"
def num_children(self):
return (self.left is not None) + (self.right is not None)
def dfs_apply(self, func):
func(self)
if self.left is not None:
self.left.dfs_apply(func)
if self.right is not None:
self.right.dfs_apply(func)
def count_unique(string):
count = {}
for char in string:
if char in count:
count[char] += 1
else:
count[char] = 1
return count
def huffman(string):
count = count_unique(string)
alphabet = "".join(count.keys())
nodes = [HuffNode(letter, count[letter]) for letter in alphabet]
leaves = nodes[:]
heapq.heapify(nodes)
while len(nodes) > 1:
first, second = heapq.heappop(nodes), heapq.heappop(nodes)
new_node = first + second
heapq.heappush(nodes, new_node)
root = nodes[0]
huffman_calculate(root)
result = {}
for leaf in leaves:
result[leaf.string] = leaf.binary
return result
def huffman_calculate(node: HuffNode, bin_prefix=""):
if node.num_children() == 0:
node.binary = bin_prefix
return
huffman_calculate(node.left, bin_prefix + "0")
huffman_calculate(node.right, bin_prefix + "1")
def elias(n):
assert n > 0
binary = bin(n)[2:]
result = binary
while len(binary) != 1:
binary = "0" + bin(len(binary) - 1)[3:]
result = binary + result
return result
def header(string):
huff_code = huffman(string)
alphabet_size = len(huff_code.keys())
output = f"{elias(alphabet_size)}"
for char in huff_code:
output += bin(ord(char))[2:]
output += elias(len(huff_code[char]))
output += huff_code[char]
return output
def read_file(filename):
with open(filename, "r") as file:
contents = file.read()
return contents
def write_file(filename, contents):
with open(filename, "w") as file:
file.write(contents)
def main():
assert len(sys.argv) >= 2
string = read_file(sys.argv[1])
write_file("output_header.txt", header(string))
if __name__ == "__main__":
main()
|