哈夫曼编码实验报告.docx-资料库

u014603203-8262943-4e003a4a37be9ae2954cd6c6247304e4.docx.pdf-第1页.png

第1页 / 共12页

u014603203-8262943-4e003a4a37be9ae2954cd6c6247304e4.docx.pdf-第2页.png

第2页 / 共12页

u014603203-8262943-4e003a4a37be9ae2954cd6c6247304e4.docx.pdf-第3页.png

第3页 / 共12页

u014603203-8262943-4e003a4a37be9ae2954cd6c6247304e4.docx.pdf-第4页.png

第4页 / 共12页

u014603203-8262943-4e003a4a37be9ae2954cd6c6247304e4.docx.pdf-第5页.png

第5页 / 共12页

u014603203-8262943-4e003a4a37be9ae2954cd6c6247304e4.docx.pdf-第6页.png

第6页 / 共12页

u014603203-8262943-4e003a4a37be9ae2954cd6c6247304e4.docx.pdf-第7页.png

第7页 / 共12页

u014603203-8262943-4e003a4a37be9ae2954cd6c6247304e4.docx.pdf-第8页.png

第8页 / 共12页

一、概要设计 1.利用 C 语言对文件的处理等函数，来读取一篇文章中所含的英文字符。 2.统计每个英文字符出现的频率，将频率值作为权值。 3.构建哈夫曼树。哈夫曼树的建立步骤（1）根据给定的 n 个权值（n 为不重复字母的个数）构成一棵二叉树的集合 F，其中每棵二叉树中只有一个带权为 Wi 日根结点，其左右字树均空。（2）在 F 中选取两棵根节点的权值最小的树作为左右子树构造一棵新的二叉树，且置新的二叉树的根节点的权值为其左、右子树上根节点日权值之和。（3）再 F 中删除这两棵树，同时将新得到的二叉树加入 F 中。（4）重复步骤（2）和（3），直到 F 只含有一棵树为止。这棵树便是哈夫曼树。 4.利用从叶子到根逆向的方法求解每个字符的哈夫曼编码。 5.抽象数据类型的定义 typedef struct{ unsigned int weight; unsigned int parent,lchild,rchild; }HTNode,*HuffmanTree; typedef char** HuffmanCode;

二、详细设计（一）核心算法 1.主函数调用 article 函数函数功能：读取绝对路径为 path 的文档里的英语字符，将其存储在数组 str 中。函数实现：构建临时数组 temp，用于存储文档里的所有字符（不管是否为英文字符），将数组 temp 中的英文字符挑选出来，存在数组 str 中，函数返回英文字符的总个数。 2.主函数调用 Pro_to_weight 函数函数功能：将每个英文字符的概率值转化为权值函数实现：先统计每个字母出现的次数，存入 count 函数。计算各字母的概率值，为了方便计算，将概率扩大 100 倍，并将扩大后的结果作为权值，存入 weight 数组里。 3.主函数调用 HuffmanCoding 函数函数功能：存放 n 个字符的权值 w（均>0），构造哈夫曼树 HT, 并求出 n 个字符的哈夫曼编码 HC。函数实现：对于 n 个字符，需要构造数据类型为 HuffmanTree 长度为 2n 的数组 HT。数组 HT 的前 n 个单元用于存取各字符的权值，后 n 个单元初始化为 0。将 HT 中权值作为实参传到 Select 函数里，按先后出现顺序求出权值最小的两个数 S1，S2，传回这两个数。按照哈夫曼树的建立步骤，构造出哈夫曼树。构造数据类型为 HuffmanCode 长度为 n 的数组 HC，构造长度为 n 的字符数组 cd，从 - 1 -

cd 的倒数第二个内存单元开始（最后一个内存单元存储\0），逆向存储哈夫曼编码。 4. HuffmanCoding 函数调用 Select 函数函数功能：在 HT[1~n]中选择 parent 为 0 且 weight 最小的两个数，先出现的赋值给 s1，后出现的赋值给 s2 函数实现：备份 HT.weight,调用 BubbleSort 对 HT 中的 weight 按从小到大排序，找到最小的两个数 min1,min2，遍历备份的 HT，找到父亲节点为 0 的 min1,min2 的位置，将 min1,min2 中位置靠前者赋值给 s1，靠后者赋值给 s2 5. Select 函数调用 BubbleSort 函数函数功能：冒泡排序，将数组 a 里的所有数从小到大排序，并将前 2 个数分别赋值给 s1,s2。函数实现：采用冒泡排序的方法，求得最小的两个数，赋值给 s1,s2。（二）完整代码 #include #include #include #define error 0 #define ok 1 #define min(a,b) ((a < b) ? a : b) #define max(a,b) ((a > b) ? a : b) #define change(ch) ch + 32 //把大写字母ch转化为小写字母,并返回 #define MAX 10000 typedef struct{ unsigned int weight; unsigned int parent,lchild,rchild; }HTNode,*HuffmanTree; - 2 -

typedef char** HuffmanCode; /* **BubbleSort(*a,length,&s1,&s2) **冒泡排序，将数组a里的所有数从小到大排序，并将 **前2个数分别赋值给s1,s2 */ void BubbleSort(int *a,int length,int &s1,int &s2) for (int i = 1; i < length; i++){ for (int j = i + 1; j < length; j++){ if (a[i] > a[j]){ int temp; temp = a[j]; a[j] = a[i]; a[i] = temp; } } } s1 = a[1]; s2 = a[2]; { } /* **Select(HT,n,&s1,&s2) **在HT[1~n]中选择parent为0且weight最小的两个数， **先出现的赋值给s1，后出现的赋值给s2 */ void Select(HuffmanTree HT,int n,int &s1,int &s2) { /*程序算法设计思路： **1.备份HT.weight,冒泡法对HT中的weight按从小到大排序，找到最小的两个数min1,min2 **2.遍历备份的HT，找到min1,min2的位置 **3.将min1,min2中位置靠前者赋值给s1，靠后者赋值给s2 */ int min1,min2; int *b,temp[3]; int k = 1; b = (int*)malloc((n + 1)*sizeof(int)); if(!b) exit(0); for(int i = 1;i <= n;i++)//备份HT中的weight到数组b中 if(HT[i].parent == 0){ b[k] = HT[i].weight; k++; } - 3 -

BubbleSort(b,k,min1,min2); for(int i = 1,j = 1;i <= n && j < 3;i++) if(HT[i].parent == 0) if(min1 == HT[i].weight || min2 == HT[i].weight){ temp[j] = i; j++; } s1 = min(temp[1],temp[2]); s2 = max(temp[1],temp[2]); free (b); } /* **HuffmanCoding（&HT,&HC,*w,n） **存放n个字符的权值w（均>0），构造哈夫曼树HT, **并求出n个字符的哈夫曼编码HC */ void HuffmanCoding(HuffmanTree &HT,HuffmanCode &HC,float *w,int n) { int i,c,f,start; int s1,s2,m; HuffmanTree p; char *cd; if(n <= 1) return; m = 2 * n - 1; HT=(HuffmanTree)malloc((m+1)*sizeof(HTNode));//0单元不使用 for(i=1,p=HT+1;i<=n;i++,p++,w++){//初始化1~n; p->weight =(int)*w; p->lchild = p->parent = p->rchild = 0; } for(;i<=m;i++,p++)//初始化n+1~m p->weight = p->lchild = p->rchild = p->parent = 0; /***哈夫曼树的建立***/ for(i=n+1;i<=m;i++){ Select(HT,i-1,s1,s2); HT[s1].parent = i; HT[s2].parent = i; HT[i].lchild = s1; HT[i].rchild = s2; HT[i].weight = HT[s1].weight +HT[s2].weight; } /*****从叶子到根结点逆向求解哈夫曼树的编码****/ HC=(HuffmanCode)malloc((n+1)*sizeof(char*)); cd = (char*)malloc(n*sizeof(char)); - 4 -

cd[n-1] = '\0'; for(i=1;i<=n;i++){ start = n -1; for(c = i,f = HT[i].parent;f != 0;c = f,f = HT[f].parent) if(HT[f].lchild == c) cd[--start] = '0'; else cd[--start] = '1'; HC[i]=(char*)malloc((n-start)*sizeof(char)); strcpy(HC[i],&cd[start]); } free(cd); } /* **Pro_to_weight(*str,*weight,length) **将字符串str中的每个字母的概率值转换为权值，存入weight中 **函数返回不同单词的个数 */ int Pro_to_weight(char *str, char *ch,float *weight,int length) { static int count[26]; int temp,num_char = 1;//num_char为不重复单词的个数 for(int i = 0;i < length;i++){//统计每个单词的出现次数，存入count数组里 if(str[i] < 'a' || str[i] > 'z') str[i] = change(str[i]); temp = str[i] - 'a'; count[temp]++; } puts("每个字母出现的次数、概率如下："); printf("字母\t次数\t概率\t\t字母\t次数\t概率\n"); for(int i = 0,j = i;i < 26;i++) if(count[i] != 0){ j++; ch [num_char] = i + 'a'; weight[num_char] = count[i] /(float) length; if(j % 2 !=0) printf(" %c\t %d\t%.2f\t\t",ch[num_char],count[i],weight[num_char]); else printf(" %c\t %d\t%.2f\n",ch[num_char],count[i],weight[num_char]); weight[num_char] = weight[num_char] * 100;//为了方便计算，将概率扩大100倍 num_char++; } return num_char - 1; - 5 -

} /* **读取绝对路径为path的文档里的英语字符，将其存储在数组str中 **函数返回英语字符的个数 */ int article(FILE* fp,char* &str,char *path) { /* **算法设计 **1.构建临时数组temp，用于存储文档里的所有字符（不管是否为英文字符） **2.将数组temp中的英文字符挑选出来，存在数组str中， */ int i = 0,k = 0;//k为单词总个数 char* temp;//存取文章中的所有字符（无论是否为英文字符） fp = fopen(path,"r"); if(!fp){ printf("文件打开失败！\n"); return error; } str =(char*)malloc(MAX*sizeof(char)); temp = (char*)malloc(MAX*sizeof(char)); if(!str || !temp) { printf("分配内存失败！\n"); return error; } do{ i++; if(i >= MAX){ temp =(char*)realloc(temp,2 * MAX*sizeof(char)); if(!temp){ printf("内存扩充失败！\n"); return error; } } temp[i] = fgetc(fp); if(temp[i] >= 'a' && temp[i] <= 'z' || temp[i] >= 'A' && temp[i] <= 'Z'){ str[k] = temp[i]; k++; if(k >= MAX){ str =(char*)realloc(str,2 * MAX*sizeof(char)); if(!str){ printf("内存扩充失败！\n"); return error; - 6 -

} } } }while(temp[i]!=EOF); str[k]='\0'; fclose(fp); return k; } void main() { FILE *fp = NULL; char *str = NULL; char path[100];//用于存储文章的绝对地址 char ch[30];//ch数组用于存储不重复单词 float weight[30]; int length,num_char;//num_char为不重复单词的个数 HuffmanCode HC; HuffmanTree HT; puts("请输入文章文档的绝对路径:"); gets(path); length = article(fp,str,path); if(length == 1||length == error){ exit(0); } printf("文章中包含的英文字符为：%d\n",length); num_char = Pro_to_weight(str,ch,weight,length); HuffmanCoding(HT,HC,weight+1,num_char);//w+1，表示0单元不使用 if(num_char % 2 != 0) printf("\n"); puts("这些字母的哈夫曼编码为："); for(int i = 1;i <= num_char;i++) printf("%c = '%s'\n",ch[i],HC[i]); } 三、算法时间复杂度分析假设英语字符的总数为 n,不重复单词个数为 m，哈夫曼的深度 p 1.BubbleSort 函数：T(n) = O(n^2); - 7 -

资料库

哈夫曼编码实验报告.docx

相关推荐

课程资源

热门标签

最新资料