Southwest Jiaotong University Data Structure Experiment: Implementation of Huffman Codec

Experiment content and requirements:

Read several uppercase English characters from the character file (the number of English character types m is recommended to be 6 to 8, such as: m=6, then the English characters can be A-F), count the occurrence frequency of m English characters, and construct a Huffman binary tree, Perform Huffman encoding on all English characters, and store the encoded bit stream in a byte (or char) array. Output the compression rate of the bit stream on the screen, and then use the array and the Huffman binary tree to decode, and output the decoded character sequence to another character file.

Tips: (1) The input and output character files are lined with 10 characters each;

(2) Non-displayable characters (such as: carriage return, line feed) in the input file are not counted and encoded;

(3) Compression rate = average code length/log2m.

Purpose of the experiment: Master Huffman binary tree and Huffman encoding and decoding.

Experiment content and requirements:

Read several uppercase English characters from the character file (the number of English character types m is recommended to be 6 to 8, such as: m=6, then the English characters can be A-F), count the occurrence frequency of m English characters, and construct a Huffman binary tree, Perform Huffman encoding on all English characters, and store the encoded bit stream in a byte (or char) array. Output the compression rate of the bit stream on the screen, and then use the array and the Huffman binary tree to decode, and output the decoded character sequence to another character file.

Tips: (1) The input and output character files are lined with 10 characters each;

(2) Non-displayable characters (such as: carriage return, line feed) in the input file are not counted and encoded;

(3) Compression rate = average code length/log2m.

Purpose of the experiment: Master Huffman binary tree and Huffman encoding and decoding.

A brief description of the algorithm design:

 

Input/Output Design Brief Description:

Input and output character files line every 10 characters.

The output has a text prompt.

Test case (no screenshot here):

Read several uppercase English characters from the file read.txt, and read the binary sequence that needs to be decoded from read1.txt:

AAAAAAAABC,111111110001

Output the decoded character sequence to write.txt:

AAAAAAAABC

Compression ratio: 0.757116

code:

#include <iostream>
#include <fstream>
#include <vector>
#include <cmath>
#define READ_ROOT "read.txt"
#define READ_decode "read1.txt"
#define WRITE_ROOT "write.txt"

//Maximum number of characters
#define MAX_TEXT 1000
//Maximum number of different characters
#define MAX_LEAF 26
//Up to 26 letters, so use at least five characters to represent
#define MAX_BITS (MAX_LEAF - 1)
//Maximum number of nodes
#define MAX_NODE (MAX_LEAF * 2 - 1)
using namespace std;

typedef struct HTNode {
    double weight;
    int parent;
    int lchild;
    int rchild;
}HFNode;

typedef struct HCode {
    char code[MAX_BITS]{};//symbol encoding
    int start;//The starting coordinates encoded in code[]
    int length{0};//code length
}HFCode;

class Huffman {
private:
    fstream file;
    //Number of character types
    int kinds{0};
    //source text
    char src_text[MAX_TEXT]{};
    //all occurrences of the source text
    vector<char> text;
    //Frequency
    vector<int> count;
    //frequency (weight)
    vector<double> weight;
    //The index of the node with the smallest weight
    int s1{0}, s2{0};
    HFNode hfNode[MAX_NODE]{};//code tree
    HFCode hfCode[MAX_LEAF]{};//code list
public:
    Huffman() = default;
    //file read
    bool DataRead(const string& root);
    //Statistical frequency. Calculated frequency
    void Statistics();
    //Choose the two nodes with the smallest weight
    void Select(int range);
    //Build a Huffman tree
    bool createHFTree();
    //Build a Huffman code
    void createHFCode();
    //Find the compression ratio
    void solveCompressibility();
    //Output Huffman encoding
    bool writeHFCode(const string& root);
    //decoding
    void dec(char receive[], char decoded[]);
    void visit();
    ~Huffman() = default;

};
void Huffman::visit()
{
    for (int i = 0; i< text.size(); i++)
    {
        cout << text[i]<<" ";
    }
}
//file read
bool Huffman::DataRead(const std::string& root) {
    file.open(root, ios::in);
    if (!file.is_open()) {
        cout << "Wrong Root!" << endl;
        return false;
    }
    else {
        int i = 0;
        while (file >> src_text[i++]);
        file.close();
        return true;
    }
}
//Statistical frequency. Calculated frequency
void Huffman::Statistics() {
    int sum = 0;
    for (auto& letter : src_text) {
        if (letter >= 'A' && letter <= 'Z') {
            bool exist = false;
            sum++;//Total frequency +1
            for (auto& i : text) {
                if (letter == i) {
                    exist = true;
                    break;
                }
            }
            if (!exist) {
                text.push_back(letter);//Letters that did not appear before are added to the sequence of characters that appear
                count.push_back(1);//Corresponding frequency initialization
                kinds++;//Number of character types +1
            }
            else {
                for (int i = 0; i < text.size(); i++) {
                    if (text[i] == letter) {
                        count[i]++;//Already appeared, corresponding frequency +1
                        break;
                    }
                }
            }
        }
    }
    for (int i : count) {
        double temp = static_cast<double>(i) / static_cast<double>(sum);
        weight.push_back(temp);
    }
}
//Choose the two nodes with the smallest weight
void Huffman::Select(int range) {
    double  min1 = 1;
    for (int i = 0; i < range; i++) {
        if (hfNode[i].weight < min1 && hfNode[i].parent == -1) {
            min1 = hfNode[i].weight;
            s1 = i;
        }
    }
    double min2 = 1;
    for (int i = 0; i < range; i++) {
        if (hfNode[i].weight < min2 && hfNode[i].parent == -1 && i != s1) {
            min2 = hfNode[i].weight;
            s2 = i;
        }
    }
}
//Build a Huffman tree
bool Huffman::createHFTree() {
    if (kinds > MAX_LEAF||kinds <= 1) {
        cout << "Wrong Range!" << endl;
        return false;
    }
    //The code tree has a total of 2 * kinds - 1 nodes
    int m = 2 * kinds - 1;
   //-1 means a null pointer, initialize all node parent s to -1
    for (int i = 0; i <m; i++) {
        if(i<kinds)  hfNode[i].weight = weight[i];//The first kinds are all letter nodes
        else         hfNode[i].weight = 0;//The following kinds-1 are the root nodes of the merged subtree
        hfNode[i].parent = hfNode[i].lchild = hfNode[i].rchild = -1;
    }
    for (int i = kinds; i < m; i++) {
        Select(i);
        hfNode[s1].parent =hfNode[s2].parent = i;
        hfNode[i].rchild = s2;
        hfNode[i].lchild = s1;
        hfNode[i].weight = hfNode[s1].weight + hfNode[s2].weight;
    }
    return true;
}
//Build a Huffman code
void Huffman::createHFCode() {
    int start;
    for (int i = 0; i < kinds; i++) {
        start = kinds-2;
        for (int c = i, f = hfNode[i].parent; f != -1; c = f, f = hfNode[f].parent) {
            if (c == hfNode[f].lchild)  hfCode[i].code[start--] = '0';
            else   hfCode[i].code[start--] = '1';
            hfCode[i].length++;
        }
        hfCode[i].start = start + 1;//Coding start coordinates
    }
}
//Find the compression ratio
void Huffman::solveCompressibility() {
    double average_length = 0;
    for (int i = 0; i < kinds; i++) {
        int length = hfCode[i].length;
        average_length += static_cast<double>(length) * weight[i];
    }
    cout << "Compression ratio: " << average_length / log2(kinds) << endl;
}
//Output Huffman code, for testing
bool Huffman::writeHFCode(const std::string& root) {
    file.open(root, ios::out);
    if (!file.is_open()) {
        cout << "Wrong Root!" << endl;
        return false;
    }
    else {
        //Output the Huffman encoding of each letter
        for (int i = 0; i < kinds; i++) {
            file << text[i] << ":";
            for (int j = hfCode[i].start; j < kinds - 1; j++) {
                file << hfCode[i].code[j];
            }
            file << endl;
        }
        file << endl;
        //Huffman encoding of the output source string
        int counter = 0;
        for (int i = 0; src_text[i] != '\0'; i++) {
            for (int j = 0; j < text.size(); j++) {
                if (src_text[i] == text[j]) {
                    for (int k = hfCode[j].start; k < kinds - 1; k++) {
                        file << hfCode[j].code[k];
                        counter++;
                        if (counter == 10) {
                            file << endl;
                            counter = 0;
                        }
                    }
                    break;
                }
            }
        }
        return true;
    }
}

//decoding
void Huffman::dec(char receive[], char decoded[])
{
    file.open(READ_decode, ios::in);
    int m = 0,n=0; int counter = 0;
    while (file >> receive[m++]);
    file.close();
    receive[m] = '\0';
    int i = 0,j = 0;
    while (receive[i] != '\0')
    {
        int  k = 2 * kinds - 2; //k points to the root node
        while (k >= kinds && receive[i] != '\0')
            if (receive[i++] == '0') k = hfNode[k].lchild;
            else k = hfNode[k].rchild;
        if (k < kinds) decoded[j++] = text[k]; //output a symbol
        //If k<kinds, then k points to a leaf node
    }
    decoded[j] = '\0';
    file.open(WRITE_ROOT, ios::out);
    while (decoded[n]!='\0')
    {
        file << decoded[n++];
        counter++;
        if (counter == 10) {
            file << endl;
            counter = 0;
        }
    };
    file.close();
}


int main() {
    Huffman huffman;
    char receive[MAX_TEXT]{};
    char decoded[MAX_TEXT]{};
    if (!huffman.DataRead(READ_ROOT)) {
        return -1;
    }
    huffman.Statistics();
    if (!huffman.createHFTree()) {
        return -1;
    }
    huffman.createHFCode();
    huffman.solveCompressibility();
    /*if (!huffman.writeHFCode(WRITE_ROOT)) {
        return -1;
    }*/
    huffman.dec(receive,decoded);

   /* for (int i = 0; decoded[i] != '\0'; i++)
    {
        cout << decoded[i];
    }*/
    /*for (int i = 0; receive[i] != '\0'; i++)
    {
        cout << receive[i];
    }*/
    return 0;
}

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

Reference (basically modified and added on the basis of the original): The Seventh Experimental Report on Data Structure of Southwest Jiaotong University--The Implementation of Huffman Codec_Jellyfish Knight's Blog-CSDN Blog

 

Tags: data structure

Posted by lpxxfaintxx on Sat, 01 Apr 2023 01:47:09 +1030