aboutsummaryrefslogtreecommitdiff
path: root/ass3/q2/header.py
blob: a10b9a9bd4ec8b34354c17a6593e70a4835021b5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import heapq
import sys


class HuffNode:
    def __init__(self, string, count):
        self.string = string
        self.count = count
        self.left = None
        self.right = None
        self.binary = None
        self.height = 0

    def __add__(self, other):
        assert isinstance(other, HuffNode)
        new_node = HuffNode(self.string + other.string, self.count + other.count)
        new_node.left = self
        new_node.right = other
        new_node.height += 1 + max(self.height, other.height)
        return new_node

    def __lt__(self, other):
        if self.count == other.count:
            return self.height < other.height
        return self.count < other.count

    def __repr__(self):
        return f"|'{self.string}', {self.count}|"

    def num_children(self):
        return (self.left is not None) + (self.right is not None)

    def dfs_apply(self, func):
        func(self)
        if self.left is not None:
            self.left.dfs_apply(func)
        if self.right is not None:
            self.right.dfs_apply(func)


def count_unique(string):
    count = {}
    for char in string:
        if char in count:
            count[char] += 1
        else:
            count[char] = 1
    return count


def huffman(string):
    count = count_unique(string)
    alphabet = "".join(count.keys())
    nodes = [HuffNode(letter, count[letter]) for letter in alphabet]
    leaves = nodes[:]
    heapq.heapify(nodes)
    while len(nodes) > 1:
        first, second = heapq.heappop(nodes), heapq.heappop(nodes)
        new_node = first + second
        heapq.heappush(nodes, new_node)
    root = nodes[0]
    huffman_calculate(root)
    result = {}
    for leaf in leaves:
        result[leaf.string] = leaf.binary
    return result


def huffman_calculate(node: HuffNode, bin_prefix=""):
    if node.num_children() == 0:
        node.binary = bin_prefix
        return
    huffman_calculate(node.left, bin_prefix + "0")
    huffman_calculate(node.right, bin_prefix + "1")


def elias(n):
    assert n > 0
    binary = bin(n)[2:]
    result = binary
    while len(binary) != 1:
        binary = "0" + bin(len(binary) - 1)[3:]
        result = binary + result
    return result


def header(string):
    huff_code = huffman(string)
    alphabet_size = len(huff_code.keys())
    output = f"{elias(alphabet_size)}"
    for char in huff_code:
        output += bin(ord(char))[2:]
        output += elias(len(huff_code[char]))
        output += huff_code[char]
    return output


def read_file(filename):
    with open(filename, "r") as file:
        contents = file.read()
    return contents


def write_file(filename, contents):
    with open(filename, "w") as file:
        file.write(contents)


def main():
    assert len(sys.argv) >= 2
    string = read_file(sys.argv[1])
    write_file("output_header.txt", header(string))

if __name__ == "__main__":
    main()