Experiment content and requirements:
Read several uppercase English characters from the character file (the number of English character types m is recommended to be 6 to 8, such as: m=6, then the English characters can be A-F), count the occurrence frequency of m English characters, and construct a Huffman binary tree, Perform Huffman encoding on all English characters, and store the encoded bit stream in a byte (or char) array. Output the compression rate of the bit stream on the screen, and then use the array and the Huffman binary tree to decode, and output the decoded character sequence to another character file.
Tips: (1) The input and output character files are lined with 10 characters each;
(2) Non-displayable characters (such as: carriage return, line feed) in the input file are not counted and encoded;
(3) Compression rate = average code length/log2m.
Purpose of the experiment: Master Huffman binary tree and Huffman encoding and decoding.
Experiment content and requirements:
Read several uppercase English characters from the character file (the number of English character types m is recommended to be 6 to 8, such as: m=6, then the English characters can be A-F), count the occurrence frequency of m English characters, and construct a Huffman binary tree, Perform Huffman encoding on all English characters, and store the encoded bit stream in a byte (or char) array. Output the compression rate of the bit stream on the screen, and then use the array and the Huffman binary tree to decode, and output the decoded character sequence to another character file.
Tips: (1) The input and output character files are lined with 10 characters each;
(2) Non-displayable characters (such as: carriage return, line feed) in the input file are not counted and encoded;
(3) Compression rate = average code length/log2m.
Purpose of the experiment: Master Huffman binary tree and Huffman encoding and decoding.
A brief description of the algorithm design:
Input/Output Design Brief Description:
Input and output character files line every 10 characters.
The output has a text prompt.
Test case (no screenshot here):
Read several uppercase English characters from the file read.txt, and read the binary sequence that needs to be decoded from read1.txt:
AAAAAAAABC,111111110001
Output the decoded character sequence to write.txt:
AAAAAAAABC
Compression ratio: 0.757116
code:
#include <iostream> #include <fstream> #include <vector> #include <cmath> #define READ_ROOT "read.txt" #define READ_decode "read1.txt" #define WRITE_ROOT "write.txt" //Maximum number of characters #define MAX_TEXT 1000 //Maximum number of different characters #define MAX_LEAF 26 //Up to 26 letters, so use at least five characters to represent #define MAX_BITS (MAX_LEAF - 1) //Maximum number of nodes #define MAX_NODE (MAX_LEAF * 2 - 1) using namespace std; typedef struct HTNode { double weight; int parent; int lchild; int rchild; }HFNode; typedef struct HCode { char code[MAX_BITS]{};//symbol encoding int start;//The starting coordinates encoded in code[] int length{0};//code length }HFCode; class Huffman { private: fstream file; //Number of character types int kinds{0}; //source text char src_text[MAX_TEXT]{}; //all occurrences of the source text vector<char> text; //Frequency vector<int> count; //frequency (weight) vector<double> weight; //The index of the node with the smallest weight int s1{0}, s2{0}; HFNode hfNode[MAX_NODE]{};//code tree HFCode hfCode[MAX_LEAF]{};//code list public: Huffman() = default; //file read bool DataRead(const string& root); //Statistical frequency. Calculated frequency void Statistics(); //Choose the two nodes with the smallest weight void Select(int range); //Build a Huffman tree bool createHFTree(); //Build a Huffman code void createHFCode(); //Find the compression ratio void solveCompressibility(); //Output Huffman encoding bool writeHFCode(const string& root); //decoding void dec(char receive[], char decoded[]); void visit(); ~Huffman() = default; }; void Huffman::visit() { for (int i = 0; i< text.size(); i++) { cout << text[i]<<" "; } } //file read bool Huffman::DataRead(const std::string& root) { file.open(root, ios::in); if (!file.is_open()) { cout << "Wrong Root!" << endl; return false; } else { int i = 0; while (file >> src_text[i++]); file.close(); return true; } } //Statistical frequency. Calculated frequency void Huffman::Statistics() { int sum = 0; for (auto& letter : src_text) { if (letter >= 'A' && letter <= 'Z') { bool exist = false; sum++;//Total frequency +1 for (auto& i : text) { if (letter == i) { exist = true; break; } } if (!exist) { text.push_back(letter);//Letters that did not appear before are added to the sequence of characters that appear count.push_back(1);//Corresponding frequency initialization kinds++;//Number of character types +1 } else { for (int i = 0; i < text.size(); i++) { if (text[i] == letter) { count[i]++;//Already appeared, corresponding frequency +1 break; } } } } } for (int i : count) { double temp = static_cast<double>(i) / static_cast<double>(sum); weight.push_back(temp); } } //Choose the two nodes with the smallest weight void Huffman::Select(int range) { double min1 = 1; for (int i = 0; i < range; i++) { if (hfNode[i].weight < min1 && hfNode[i].parent == -1) { min1 = hfNode[i].weight; s1 = i; } } double min2 = 1; for (int i = 0; i < range; i++) { if (hfNode[i].weight < min2 && hfNode[i].parent == -1 && i != s1) { min2 = hfNode[i].weight; s2 = i; } } } //Build a Huffman tree bool Huffman::createHFTree() { if (kinds > MAX_LEAF||kinds <= 1) { cout << "Wrong Range!" << endl; return false; } //The code tree has a total of 2 * kinds - 1 nodes int m = 2 * kinds - 1; //-1 means a null pointer, initialize all node parent s to -1 for (int i = 0; i <m; i++) { if(i<kinds) hfNode[i].weight = weight[i];//The first kinds are all letter nodes else hfNode[i].weight = 0;//The following kinds-1 are the root nodes of the merged subtree hfNode[i].parent = hfNode[i].lchild = hfNode[i].rchild = -1; } for (int i = kinds; i < m; i++) { Select(i); hfNode[s1].parent =hfNode[s2].parent = i; hfNode[i].rchild = s2; hfNode[i].lchild = s1; hfNode[i].weight = hfNode[s1].weight + hfNode[s2].weight; } return true; } //Build a Huffman code void Huffman::createHFCode() { int start; for (int i = 0; i < kinds; i++) { start = kinds-2; for (int c = i, f = hfNode[i].parent; f != -1; c = f, f = hfNode[f].parent) { if (c == hfNode[f].lchild) hfCode[i].code[start--] = '0'; else hfCode[i].code[start--] = '1'; hfCode[i].length++; } hfCode[i].start = start + 1;//Coding start coordinates } } //Find the compression ratio void Huffman::solveCompressibility() { double average_length = 0; for (int i = 0; i < kinds; i++) { int length = hfCode[i].length; average_length += static_cast<double>(length) * weight[i]; } cout << "Compression ratio: " << average_length / log2(kinds) << endl; } //Output Huffman code, for testing bool Huffman::writeHFCode(const std::string& root) { file.open(root, ios::out); if (!file.is_open()) { cout << "Wrong Root!" << endl; return false; } else { //Output the Huffman encoding of each letter for (int i = 0; i < kinds; i++) { file << text[i] << ":"; for (int j = hfCode[i].start; j < kinds - 1; j++) { file << hfCode[i].code[j]; } file << endl; } file << endl; //Huffman encoding of the output source string int counter = 0; for (int i = 0; src_text[i] != '\0'; i++) { for (int j = 0; j < text.size(); j++) { if (src_text[i] == text[j]) { for (int k = hfCode[j].start; k < kinds - 1; k++) { file << hfCode[j].code[k]; counter++; if (counter == 10) { file << endl; counter = 0; } } break; } } } return true; } } //decoding void Huffman::dec(char receive[], char decoded[]) { file.open(READ_decode, ios::in); int m = 0,n=0; int counter = 0; while (file >> receive[m++]); file.close(); receive[m] = '\0'; int i = 0,j = 0; while (receive[i] != '\0') { int k = 2 * kinds - 2; //k points to the root node while (k >= kinds && receive[i] != '\0') if (receive[i++] == '0') k = hfNode[k].lchild; else k = hfNode[k].rchild; if (k < kinds) decoded[j++] = text[k]; //output a symbol //If k<kinds, then k points to a leaf node } decoded[j] = '\0'; file.open(WRITE_ROOT, ios::out); while (decoded[n]!='\0') { file << decoded[n++]; counter++; if (counter == 10) { file << endl; counter = 0; } }; file.close(); } int main() { Huffman huffman; char receive[MAX_TEXT]{}; char decoded[MAX_TEXT]{}; if (!huffman.DataRead(READ_ROOT)) { return -1; } huffman.Statistics(); if (!huffman.createHFTree()) { return -1; } huffman.createHFCode(); huffman.solveCompressibility(); /*if (!huffman.writeHFCode(WRITE_ROOT)) { return -1; }*/ huffman.dec(receive,decoded); /* for (int i = 0; decoded[i] != '\0'; i++) { cout << decoded[i]; }*/ /*for (int i = 0; receive[i] != '\0'; i++) { cout << receive[i]; }*/ return 0; }
Reference (basically modified and added on the basis of the original): The Seventh Experimental Report on Data Structure of Southwest Jiaotong University--The Implementation of Huffman Codec_Jellyfish Knight's Blog-CSDN Blog