C++ 哈夫曼树对文件压缩、加密实现代码

在以前写LZW压缩算法的时候,遇到很多难受的问题,基本上都在哈夫曼编码中解决了,虽然写这代码很费神,但还是把代码完整的码出来了,毕竟哈夫曼这个思想确实很牛逼。哈夫曼树很巧妙的解决了当时我在LZW序列化的时候想解决的问题,就是压缩后文本的分割。比如用lzw编码abc,就是1,2,3。但这个在存为文件的时候必须用分割符把1,2,3分割开,非常浪费空间,否则会和12 23 123 产生二义性。而哈夫曼树,将所有char分布在叶节点上,在还原的时候,比如1101110,假设110是叶节点,那么走到110的时候就可以确定,已经走到尽头,回到根节点继续走,这样就避免了字符的分割,全部用1010101010101这样的路径表示字符,可以将8位压缩为1个char进行存储。在构造树的时候,将出现率高的char放在上面,这样路径就很短,自然就节省了存储空间。虽然哈夫曼压缩效率不是最高的,但还算比较乐观的。

哈夫曼除了压缩以外还可以用于加密,在将文本用哈夫曼编码时,需持久化生成的char计数链表结构,这样才能还原出树结构,而解码时路径正是依赖于树结构的。也就是说,这种编码是属于约定形式的编码,在编码时用原文本产生树结构,而存储的是树路径,解码的时候缺少树或树结构与原先不相符都是无法完成解码的,就好比,我用10代表a,你存的是10,你将10解释为 b或c等等都是不正确的。由于转换为了char存储,所以还需持久化最后填充的数目、文本长度,才能还原出原先的01表示的文本格式

这个代码有一定缺陷,由于当时考虑的是对文本进行处理,当文件中有char='\0' 时会出现错误,这个代码打的很费神,就不继续修复了,如有需要,可自行更改,解决的办法应该挺多的

先来个运行图:

源代码

#include<iostream>

#include<sstream>

#include<fstream>

void WriteFile(char* path,const char* content,int length,bool append=false);

using namespace std;

struct Node{

char data;

Node* left;

Node* right;

};

struct L_Node{

int count;

Node* node;

L_Node* next;

};

Node* AddNode(int count,char data,L_Node*& first){

L_Node* lnode=new L_Node();

lnode->count=count;

Node* node=new Node();

node->data=data;

node->left=0;

node->right=0;

lnode->node=node;

if(first==0){

first=lnode;

}

else{

if(lnode->count<first->count){

lnode->next=first;

first=lnode;

}

else{

L_Node* iter=first;

while(iter->next!=0&&iter->next->count<lnode->count){

iter=iter->next;

}

if(iter->next==0){

iter->next=lnode;

lnode->next=0;

}

else{

lnode->next=iter->next;

iter->next=lnode;

}

}

}

return node;

}

void SaveLNodes(L_Node* first){

stringstream ss;

while(first!=0){

ss<<(int)(unsigned char)first->node->data<<':'<<first->count<<' ';

first=first->next;

}

WriteFile("nodes.txt",ss.str().c_str(),ss.str().length());

}

void GetLNodes(L_Node*& first){

char temp[32];

ifstream in;

in.open("nodes.txt",ios::in|ios::binary);

while(!in.eof()){

temp[0]=0;

in>>temp;

if(strlen(temp)>0){

char* data=strtok(temp,":");

char* count=strtok(0,":");

AddNode(atoi(count),atoi(data),first);

}

}

}

void BuildSortedList(char* content,L_Node*& first,int length){

int array[256]={

0

};

for(int i=0;i<length;i++){

array[(unsigned char)content[i]]++;

}

for(int i=0;i<256;i++){

if(array[i]>0){

AddNode(array[i],(char)i,first);

}

}

SaveLNodes(first);

}

void BuildTree(L_Node*& first,Node*& root){//get l1->node,l2->node,remove l1,l2,then put l3 into list,then set l3->left and l3->right

if(first->next==0){

Node* node=new Node();

root=node;

root->right=0;

node=new Node();

node->data=first->node->data;

node->left=0;

node->right=0;

root->left=node;

delete first;

return;

}

while(first->next!=0){

int count=first->count+first->next->count;

Node* node1=first->node;

L_Node* temp=first;

first=first->next;

delete temp;

Node* node2=first->node;

temp=first;

delete temp;

first=first->next;

root=AddNode(count,0,first);

root->left=node1;

root->right=node2;

//cout<<(int)node1->data<<':'<<(int)node2->data<<endl;

}

delete first;

}

void PreOrderTraversal(Node* node,char* track,int branch,char** table){

if(node!=0){

char* track2=0;

if(branch==0){

track2=new char[strlen(track)+2];

sprintf(track2,"%s0\0",track);

}

else if(branch==1){

track2=new char[strlen(track)+2];

sprintf(track2,"%s1\0",track);

}

else{

track2=new char[strlen(track)+1];

sprintf(track2,"%s\0",track);

}

if(node->data!=0){

table[(unsigned char)node->data]=track2;

}

PreOrderTraversal(node->left,track2,0,table);

PreOrderTraversal(node->right,track2,1,table);

if(node->data==0){

delete track2;

}

}

}

void PreOrderTraversal(Node* node){

if(node!=0){

cout<<(int)(unsigned char)node->data<<endl;

PreOrderTraversal(node->left);

PreOrderTraversal(node->right);

}

}

char* Encode(const char* content,char** table,int length){

stringstream ss;

for(int i=0;i<length;i++){

if((unsigned char)content[i]==0){

}

else{

ss<<table[(unsigned char)content[i]];

}

}

char* encoded_content=new char[ss.str().length()+1];

memcpy(encoded_content,ss.str().c_str(),ss.str().length());

encoded_content[ss.str().length()]=0;

return encoded_content;

}

int BinToDec(char* bin_content){

int number=0;

int cur=1;

for(int i=7;i>=0;i--){

number+=(bin_content[i]-'0')*cur;

cur*=2;

}

return number;

}

char* BinToCharText(const char* bin_content,int& fill_count,int& save_length){

int length=strlen(bin_content);

fill_count=8-length%8;

if(fill_count>0){

char* temp=new char[length+fill_count+1];

char temp1[fill_count];

for(int i=0;i<fill_count;i++){

temp1[i]='0';

}

sprintf(temp,"%s%s",bin_content,temp1);

temp[length+fill_count]=0;

bin_content=temp;

}

length+=fill_count;

save_length=length/8;

char* text=new char[length/8+1];

for(int i=0;i<length;i+=8){

char temp[8];

memcpy(temp,bin_content+i,8);

text[i/8]=(char)BinToDec(temp);

}

text[length/8]=0;

if(fill_count>0){

delete bin_content;

}

return text;

}

char* DecToBin(int num){

char* bin=new char[8];

if(num<0){

num=256+num;

}

for(int i=7;i>=0;i--){

bin[i]=num%2+'0';

num/=2;

}

return bin;

}

char* CharTextToBin(char* text,int fill_count,int save_length){

int length=save_length;

char* content=new char[8*length+1];

int pos=0;

for(int i=0;i<length;i++){

int number=text[i];

char* bin=DecToBin(number);

memcpy(content+pos,bin,8);

pos+=8;

delete bin;

}

content[8*length-fill_count]=0;

return content;

}

char* Decode(const char* encode_content,Node* tree){

stringstream ss;

Node* node=tree;

for(int i=0;i<strlen(encode_content);i++){

if(encode_content[i]=='0'){

node=node->left;

}

else if(encode_content[i]=='1'){

node=node->right;

}

if(node->data!=0){

ss<<node->data;

node=tree;

}

}

char* decode_content=new char[ss.str().length()+1];

memcpy(decode_content,ss.str().c_str(),ss.str().length());

decode_content[ss.str().length()]=0;

return decode_content;

}

void ReleaseTable(char** table){

for(int i=0;i<256;i++){

if(table[i]!=0){

delete table[i];

}

}

}

void PostOrderTraversal(Node* node){

if(node!=0){

PostOrderTraversal(node->left);

PostOrderTraversal(node->right);

delete node;

}

}

char* ReadFile(char* path,long& length){

char* content=0;

ifstream in;

in.open(path,ios::in|ios::binary);

in.seekg(0,ios::end);

length=in.tellg();

content=new char[length+1];

in.seekg(0,ios::beg);

int i=0;

while(!in.eof()){

content[i++]=in.get();

}

content[length]=0;

in.close();

return content;

}

char* ReadFile(char* path,int& fill_count,int& save_length){

char* content=0;

ifstream in;

in.open(path,ios::in|ios::binary);

in>>fill_count>>save_length;

long cur=in.tellg()+(long)1;

in.seekg(0,ios::end);

long length=in.tellg()-cur;

content=new char[length+1];

in.seekg(cur,ios::beg);

int i=0;

while(!in.eof()){

content[i++]=in.get();

}

content[length]=0;

in.close();

return content;

}

void WriteFile(char* path,const char* content,int length,bool append){

ofstream out;

if(append){

out.open(path,ios::out|ios::binary|ios::app);

}

else{

out.open(path,ios::out|ios::binary);

}

out.write(content,length);

out.close();

}

int main(){

long length;

char* content=ReadFile("content.txt",length);

L_Node* first=0;

BuildSortedList(content,first,length); //create nodes list and save to nodes file

//GetLNodes(first);//get and recreate nodes from file

Node* root=0;//used for buildtable and decode

BuildTree(first,root);//build tree by nodes list and release sorted list

char* table[256]={//build table,used for encode

0

};

PreOrderTraversal(root,"",-1,table);//create table

char* encode_content=Encode(content,table,length);//convert content to encoded bin text

cout<<encode_content<<endl;

delete content;

ReleaseTable(table);//release table when encode finished

int fill_count;

int save_length;

char* save_text=BinToCharText(encode_content,fill_count,save_length);//convert encoded bin text to char text and save these text to file

delete encode_content;

char head_info[32];

sprintf(head_info,"%d %d ",fill_count,save_length);

WriteFile("encoded_content.txt",head_info,strlen(head_info));

WriteFile("encoded_content.txt",save_text,save_length,true);

delete save_text;

save_text=ReadFile("encoded_content.txt",fill_count,save_length);//read fill_count、save_length、encoded char text from file

char* bin_text= CharTextToBin(save_text,fill_count,save_length);//convert char text to bin text

delete save_text;

char* decode_content=Decode(bin_text,root);//decode by bin_text and tree

cout<<decode_content<<endl;

delete bin_text;

delete decode_content;

PostOrderTraversal(root);//release tree

return 0;

}

以上是 C++ 哈夫曼树对文件压缩、加密实现代码 的全部内容, 来源链接: utcz.com/z/319576.html

回到顶部