From 963bc3053a492fdf95f46c1c2251ce8d0423ea25 Mon Sep 17 00:00:00 2001 From: abc12137 <144017707+abc12137@users.noreply.github.com> Date: Thu, 27 Jun 2024 10:29:00 +0800 Subject: [PATCH] Create main.c --- .../main.c" | 252 ++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 "\345\256\236\351\252\214\344\272\224\357\274\232\345\223\210\345\244\253\346\233\274\347\274\226\347\240\201/main.c" diff --git "a/\345\256\236\351\252\214\344\272\224\357\274\232\345\223\210\345\244\253\346\233\274\347\274\226\347\240\201/main.c" "b/\345\256\236\351\252\214\344\272\224\357\274\232\345\223\210\345\244\253\346\233\274\347\274\226\347\240\201/main.c" new file mode 100644 index 0000000..409d0fb --- /dev/null +++ "b/\345\256\236\351\252\214\344\272\224\357\274\232\345\223\210\345\244\253\346\233\274\347\274\226\347\240\201/main.c" @@ -0,0 +1,252 @@ +#include +#include +#include + +// 定义哈夫曼树节点结构体 +typedef struct { + char ch; // 字符 + int weight; // 权重 + int parent, lchild, rchild; +} HTNode, *HuffmanTree; + +typedef char **HuffmanCode; // 存放哈夫曼编码 + +struct Char_frequency { + char s; // 字符 + int f; // 字符出现的频率 +}; + +HuffmanTree HT; // 全局变量存储赫夫曼树 +HuffmanCode HC; // 全局变量存储赫夫曼编码 +int count1; // 非零字符类型个数 + +void Select(HuffmanTree HT, int end, int *s1, int *s2); +void creat_HuffmanTree(HuffmanTree *HT, HuffmanCode *HC, struct Char_frequency *w, int n); +int calculate_frequency(); +void write_encoded_text(struct Char_frequency* array, HuffmanCode HC, int count1); +void decode_text(HuffmanTree HT, int count1); +void compare_files(); + +int main() { + // 计算字符频率 + if (calculate_frequency() != 0) { + // 解码文本 + decode_text(HT, count1); + // 比较文件内容 + compare_files(); + } + return 0; +} + +void write_encoded_text(struct Char_frequency* array, HuffmanCode HC, int count1) { + FILE *inputFile = fopen("D:\\test.txt", "r"); + FILE *outputFile = fopen("D:\\text2.txt", "w"); + + if (inputFile == NULL || outputFile == NULL) { + fprintf(stderr, "无法打开文件\n"); + return; + } + + char ch; + while ((ch = fgetc(inputFile)) != EOF) { + for (int i = 0; i < count1; i++) { + if (ch == array[i].s) { // 在哈夫曼编码数组中找到对应的字符 + fprintf(outputFile, "%s", HC[i + 1]); // 写入对应的哈夫曼编码 + break; + } + } + } + + fclose(inputFile); + fclose(outputFile); +} + +void Select(HuffmanTree HT, int end, int *s1, int *s2) { + int min1 = -1, min2 = -1; + for (int i = 1; i <= end; i++) { + if (HT[i].parent == 0) { + if (min1 == -1 || HT[i].weight < HT[min1].weight) { + min2 = min1; + min1 = i; + } else if (min2 == -1 || HT[i].weight < HT[min2].weight) { + min2 = i; + } + } + } + *s1 = min1; + *s2 = min2; +} +//创建哈夫曼树 +void creat_HuffmanTree(HuffmanTree *HT, HuffmanCode *HC, struct Char_frequency *w, int n) { + if (n <= 1) return; + int m = 2 * n - 1; + *HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode)); // 0号单元未用 + // 初始化哈夫曼树的n个叶子节点的权值和字符 + for (int i = 1; i <= n; i++) { + (*HT)[i].ch = w[i - 1].s; // 字符 + (*HT)[i].weight = w[i - 1].f; // 字符频率数组下标是从0开始的 + (*HT)[i].lchild = 0; + (*HT)[i].rchild = 0; + (*HT)[i].parent = 0; + } + // 初始化哈夫曼树的n+1到m个分支节点 + for (int i = n + 1; i <= m; i++) { + (*HT)[i].weight = 0; + (*HT)[i].lchild = 0; + (*HT)[i].rchild = 0; + (*HT)[i].parent = 0; + } + // 构建哈夫曼树 + for (int i = n + 1; i <= m; i++) { + int s1, s2; + Select(*HT, i - 1, &s1, &s2); // 选择两个权值最小的节点 + (*HT)[s1].parent = i; + (*HT)[s2].parent = i; + (*HT)[i].lchild = s1; + (*HT)[i].rchild = s2; + (*HT)[i].weight = (*HT)[s1].weight + (*HT)[s2].weight; + } + // 生成哈夫曼编码,逆向求编码 + *HC = (HuffmanCode)malloc((n + 1) * sizeof(char *)); + char *cd = (char *)malloc(n * sizeof(char)); + cd[n - 1] = '\0'; + for (int i = 1; i <= n; i++) { + int start = n - 1;//编码结束位置 + for (int c = i, f = (*HT)[c].parent; f != 0; c = f, f = (*HT)[f].parent) { + if ((*HT)[f].lchild == c) { + cd[--start] = '0'; + } else { + cd[--start] = '1'; + } + } + (*HC)[i] = (char *)malloc((n - start) * sizeof(char)); + strcpy((*HC)[i], &cd[start]); + // 打印字符及其对应的哈夫曼编码 + printf("字符: %c 哈夫曼编码: %s\n", (*HT)[i].ch, (*HC)[i]); + } + + free(cd); +} + +int calculate_frequency() { + FILE *file; + int count = 0; + count1 = 0; // + int frequency[127] = {0}; // ASCII码来映射到每个字符的,统计其相应的概率 + int ch; + // 打开文件 + file = fopen("D:\\test.txt", "r"); + if (file == NULL) { + fprintf(stderr, "无法打开文件\n"); + return 0; + } else { + while ((ch = fgetc(file)) != EOF) { + printf("%c",ch); + if (ch >= 0 && ch < 127) { + frequency[ch]++; // 更新相应字符的频率 + } + count++; + } + } + fclose(file); + + printf("原文字符数:%d\n", count); + + for (int i = 0; i < 127; i++) { + printf("%c:%d ", i, frequency[i]); + if (frequency[i] != 0) + count1++; + } + printf("\n"); + printf("非零字符类型:%d\n", count1); // 统计非0字符个数,优化数组方便构建哈夫曼树 + + struct Char_frequency *array = (struct Char_frequency *)malloc(sizeof(struct Char_frequency) * count1); + if (array == NULL) { + fprintf(stderr, "内存分配失败\n"); + return 0; + } + int j = 0; + for (int i = 0; i < 127; i++) { + if (frequency[i] != 0) { + array[j].f = frequency[i]; + array[j].s = i; + j++; + } + } + for (int i = 0; i < count1; i++) { + printf("%c:%d ", array[i].s, array[i].f); + } + printf("\n"); + + // 接下来调用哈夫曼树的创建函数生成哈夫曼编码 + creat_HuffmanTree(&HT, &HC, array, count1); + + // 将原文根据哈夫曼编码写入文件 + write_encoded_text(array, HC, count1); + + // 释放内存 + free(array); + + return 0; +} + +void decode_text(HuffmanTree HT, int count1) { + FILE *encodedFile = fopen("D:\\text2.txt", "r"); + FILE *outputFile = fopen("D:\\text3.txt", "w"); + + if (encodedFile == NULL || outputFile == NULL) { + fprintf(stderr, "无法打开文件\n"); + return; + } + + int root = 2 * count1 - 1; // 赫夫曼树根节点的索引 + int current = root; + char bit; + + while ((bit = fgetc(encodedFile)) != EOF) { + if (bit == '0') { + current = HT[current].lchild; + } else if (bit == '1') { + current = HT[current].rchild; + } + + if (HT[current].lchild == 0 && HT[current].rchild == 0) { // 叶子节点 + fputc(HT[current].ch, outputFile); // 输出字符 + current = root; // 重置为根节点,开始解码下一个字符 + } + } + + fclose(encodedFile); + fclose(outputFile); +} +void compare_files() { + FILE *file1; + FILE *file2; + int ch1, ch2; + // 打开文件 + file1 = fopen("D:\\test.txt", "r"); + file2 = fopen("D:\\test3.txt", "r"); + if (file1 == NULL || file2 == NULL) { + fprintf(stderr, "无法打开文件\n"); + return; + } else { + while ((ch1 = fgetc(file1)) != EOF && (ch2 = fgetc(file2)) != EOF) { + if (ch1 != ch2) { + printf("文件存在不同\n"); + fclose(file1); + fclose(file2); + return; + } + } + + if (feof(file1) && feof(file2)) { + printf("文件完全一致\n"); + } else { + printf("文件长度不同\n"); + } + } + + // 关闭文件 + fclose(file1); + fclose(file2); +}