aboutsummaryrefslogtreecommitdiff
path: root/ass3/q2/header.py
diff options
context:
space:
mode:
authorakiyamn2021-05-15 16:56:14 +1000
committerakiyamn2021-05-15 16:56:14 +1000
commit0e9a7210b759142c27c92068db81a85d0f08f4dc (patch)
tree5828528432fd8671cec139e0e3805d4ba7da2142 /ass3/q2/header.py
parent22d7bc2dfcc974aedc91afebd27797fc00a9ecb1 (diff)
downloadfit3155-0e9a7210b759142c27c92068db81a85d0f08f4dc.tar.gz
fit3155-0e9a7210b759142c27c92068db81a85d0f08f4dc.zip
Ass 3: q2 internals done
Diffstat (limited to 'ass3/q2/header.py')
-rw-r--r--ass3/q2/header.py106
1 files changed, 106 insertions, 0 deletions
diff --git a/ass3/q2/header.py b/ass3/q2/header.py
new file mode 100644
index 0000000..8d98fb4
--- /dev/null
+++ b/ass3/q2/header.py
@@ -0,0 +1,106 @@
+import heapq
+
+
+class HuffNode:
+ def __init__(self, string, count):
+ self.string = string
+ self.count = count
+ self.left = None
+ self.right = None
+ self.binary = None
+ self.height = 0
+
+ def __add__(self, other):
+ assert isinstance(other, HuffNode)
+ new_node = HuffNode(self.string + other.string, self.count + other.count)
+ new_node.left = self
+ new_node.right = other
+ new_node.height += 1 + max(self.height, other.height)
+ return new_node
+
+ def __lt__(self, other):
+ if self.count == other.count:
+ return self.height < other.height
+ return self.count < other.count
+
+ def __repr__(self):
+ return f"|'{self.string}', {self.count}|"
+
+ def num_children(self):
+ return (self.left is not None) + (self.right is not None)
+
+ def dfs_apply(self, func):
+ func(self)
+ if self.left is not None:
+ self.left.dfs_apply(func)
+ if self.right is not None:
+ self.right.dfs_apply(func)
+
+
+def count_unique(string):
+ count = {}
+ for char in string:
+ if char in count:
+ count[char] += 1
+ else:
+ count[char] = 1
+ return count
+
+
+def huffman(string):
+ count = count_unique(string)
+ alphabet = "".join(count.keys())
+ nodes = [HuffNode(letter, count[letter]) for letter in alphabet]
+ leaves = nodes[:]
+ heapq.heapify(nodes)
+ while len(nodes) > 1:
+ first, second = heapq.heappop(nodes), heapq.heappop(nodes)
+ new_node = first + second
+ heapq.heappush(nodes, new_node)
+ root = nodes[0]
+ root.dfs_apply(print)
+ huffman_calculate(root)
+ result = {}
+ for leaf in leaves:
+ result[leaf.string] = leaf.binary
+ return result
+
+
+def huffman_calculate(node: HuffNode, bin_prefix=""):
+ if node.num_children() == 0:
+ node.binary = bin_prefix
+ return
+ huffman_calculate(node.left, bin_prefix + "0")
+ huffman_calculate(node.right, bin_prefix + "1")
+
+
+def elias(n):
+ assert n > 0
+ binary = bin(n)[2:]
+ result = binary
+ while len(binary) != 1:
+ binary = "0" + bin(len(binary)-1)[3:]
+ result = binary + result
+ return result
+
+def header(string):
+ huff_code = huffman(string)
+ alphabet_size = len(huff_code.keys())
+ output = f"{elias(alphabet_size)}"
+ output2 = f"{alphabet_size} -- "
+ for char in huff_code:
+ output += bin(ord(char))[2:]
+ output += elias(len(huff_code[char]))
+ output += huff_code[char]
+
+ output2 += "("
+ output2 += str(ord(char))
+ output2 += ": "
+ output2 += str(len(huff_code[char]))
+ output2 += " -> '"
+ output2 += huff_code[char]
+ output2 += "'), "
+ return output, output2
+
+
+print(header("aacaacabcaba"))