信息论中对于定长编码和哈夫曼编码效率的比较

这是期末信息论课程的一次实验内容,实验进行了定长和哈夫曼码的编码和译码,同时也比较了两种编码的编码效率,比较有意思。

实验内容:

掷骰子游戏,每次同时抛掷两枚骰子,将两枚骰子点数的和作为游戏的结果,重复抛掷 1000 次(视为 1000 次信源符号输出)。

要求:

  1. 对 1000 次游戏结果进行逐符号二进制定长编码和译码。
  2. 对 1000 次游戏结果进行逐符号二进制变长编码和译码(Huffman 编码)
  3. 比较上述两种编码的效率。

解决方案:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#Theory of Information extra section
#Author:Jason Bian

import random
import math

#set the max_range
max_range=1000

#get random numbers with two dices
dice_arr=[]
for i in range(0,max_range):
tempdice1=random.randint(1,6)
tempdice2=random.randint(1,6)
tempdicetotal=tempdice1+tempdice2
dice_arr.append(tempdicetotal)
print "dicrArray is ",dice_arr

#define fixed encoding code
fixedEncoding_arr={2:"0000",3:"0001",4:"0010",5:"0011",6:"0100",7:"0101",8:"0110",9:"0111",10:"1000",11:"1001",12:"1010"}

#use random numbers generate fixed code
fixedArray=[]
for i in range(len(dice_arr)):
fixedArray.append(fixedEncoding_arr[dice_arr[i]])
print "fixedArray is ",fixedArray
fixedCode="".join(fixedArray)
print "fixedCode is ",fixedCode

#collect char freqs
freqs_arr={2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0}
for i in range(len(dice_arr)):
freqs_arr[dice_arr[i]]+=1
print "fixed code freqsArray is ",freqs_arr

#calc the probability of each code
prob_arr=[]
for i in freqs_arr:
prob_arr.append(float(freqs_arr[i])/float(max_range))
print "code probability arr is ",prob_arr

#calc the information entropy
informationEntropy=0.0
for i in range(len(prob_arr)):
if prob_arr[i]!=0:
informationEntropy=informationEntropy-prob_arr[i]*math.log(prob_arr[i],2)
print "information entropy is ",informationEntropy

#print fixed code table
print "fixed Code Table:"
for i in freqs_arr:
print "Number:%s freq:%-2d encoding: %s"%(i,freqs_arr[i],fixedEncoding_arr[i])

#calc the fixed code efficiency
fixedEffic=(max_range*informationEntropy)/(4*max_range*math.log(2,2))
print "fixed code efficiency is ",fixedEffic

#decode the fixed code
lPos=0
rPos=4
fixedDecode=[]
for i in range(max_range):
tempCode=fixedCode[lPos:rPos]
for j in fixedEncoding_arr:
if fixedEncoding_arr[j]==tempCode:
fixedDecode.append(j)
lPos=lPos+4
rPos=rPos+4
print "the decode of fixed code is ",fixedDecode

#Huffman Tree-Node Type
class Node:
def __init__(self,freq):
self.left=None
self.right=None
self.father=None
self.freq=freq
def isLeft(self):
return self.father.left==self

#create Huffman leaf node
def createNodes(freqs):
return [Node(freq) for freq in freqs]

#create Huffman-Tree
def createHuffmanTree(nodes):
queue=nodes[:]
while len(queue)>1:
queue.sort(key=lambda item:item.freq)
node_left=queue.pop(0)
node_right=queue.pop(0)
node_father=Node(node_left.freq+node_right.freq)
node_father.left=node_left
node_father.right=node_right
node_left.father=node_father
node_right.father=node_father
queue.append(node_father)
queue[0].father=None
return queue[0]

#Huffman Encoding
def huffmanEncoding(nodes,root):
codes=['']*len(nodes)
for i in range(len(nodes)):
node_tmp=nodes[i]
while node_tmp!=root:
if node_tmp.isLeft():
codes[i]='0'+codes[i]
else:
codes[i]='1'+codes[i]
node_tmp=node_tmp.father
return codes

#generate Huffman Tree
nodes=createNodes(freqs_arr[i] for i in freqs_arr)
root=createHuffmanTree(nodes)
HuffmanCodes=huffmanEncoding(nodes,root)
print "HuffmanCodeArray is ",HuffmanCodes

#print Huffman Code Table
temp_range=0
print "Huffman Code Table:"
for i in freqs_arr:
print "Number:%s freq:%-2d encoding: %s"%(i,freqs_arr[i],HuffmanCodes[temp_range])
temp_range=temp_range+1

#use random numbers generate Huffman code
HuffmanArray=[]
for i in range(len(dice_arr)):
HuffmanArray.append(HuffmanCodes[dice_arr[i]-2])
print "HuffmanArray is ",HuffmanArray
HuffmanCode="".join(HuffmanArray)
print "HuffmanCode is ",HuffmanCode

#calc the Huffman average code length
HuffmanAveLen=0
for i in range(len(prob_arr)):
HuffmanAveLen=HuffmanAveLen+prob_arr[i]*len(HuffmanCodes[i])
print "Huffman average length is ",HuffmanAveLen

#calc the Huffman code efficiency
HuffmanEffic=informationEntropy/(HuffmanAveLen*math.log(2,2))
print "Huffman code efficiency is ",HuffmanEffic

#decode the Huffman code
lPos=0
rPos=0
HuffmanDecode=[]
while(len(HuffmanDecode)<max_range):
tempCode=HuffmanCode[lPos:rPos]
haveHuffman=0
while(haveHuffman==0):
rPos=rPos+1
for j in range(len(HuffmanCodes)):
if HuffmanCodes[j]==tempCode:
HuffmanDecode.append(j+2)
haveHuffman=1
tempCode=HuffmanCode[lPos:rPos]
lPos=rPos-1
rPos=lPos+1
print "the decode of Huffman code is ",HuffmanDecode

raw_input("press any key to quit")

用到的公式

无标题