#include <iostream>
#include <iomanip>
#include <fstream>
#include <vector>
#include <string>
#include <queue>
#include <cmath>
#include <cassert>
#define SIZE 301
#define CTRLCHAR 300
using namespace std;
struct node {
int weight;
//unsigned char value;
int value;
const node *child0;
const node *child1;
node( int c = 0, int i = -1 ) {
value = c;
weight = i;
child0 = 0;
child1 = 0;
}
node( const node* c0, const node *c1 ) {
value = 0;
weight = c0->weight + c1->weight;
child0 = c0;
child1 = c1;
}
//overloading > to be used by the priority_queue
bool operator>( const node &a ) const {
return weight > a.weight;
}
void traverse( vector<string> & ,ostream & ,string code = "") const;
};
//_______________________________global vars______________________________________________
typedef priority_queue<node,vector<node>,greater<node> > minHeap;
minHeap q;
string endCtrl;
int dicSize=0;
vector<pair <char,int> > charWeight;
//_______________________functions prototypes_______________________________________________
long getFileSize(ifstream & file,streamoff i);
long getFileSize(ifstream & file);
bool bufferRead( ifstream& input,int& readSize ,char* buffer,streamoff i,bool flag);
bool bufferRead( ifstream& input,int& readSize ,char* buffer);
void count_chars(vector<int>& counts,int& readSize,char *buffer);
void makeTree(minHeap & q);
unsigned char toChar(string tmp);
void writeBit(char bit,bool nextReadFlag,string & endSt,ostream & fout);
void dec2binHelp(int x,string & result);
string dec2bin(int x);
void makeBinary(char * buffer,int size,vector<string> & codes);
void encode(char * buffer,const int & size,ofstream & fout);
string makeOutputName(string file,string &format);
string makeOutforDec(string fileName,string format);
//_______________________________________________________________________
void node::traverse(vector<string>& stArray,ostream& fout, string code) const
{
if ( child0 ) {
child0->traverse(stArray,fout,code + "0" );
child1->traverse(stArray,fout,code + "1" );
} else
{
switch (value){
case ' ':
cout <<left<<"sp"<<":";
break;
case '\n':
cout<<left<<"En"<<":";
break;
case '\t':
cout<<left<<"Tab"<<":";
break;
default:
if(value != CTRLCHAR)
cout<<left<<(unsigned char)value<<":";
else
cout<<left<<"CTRL:";
}
if(value != CTRLCHAR)
{
stArray[value]=code;
dicSize++;
}
else
stArray[CTRLCHAR]=code;
if(value != CTRLCHAR)
cout<<weight<< " code:"<<right<<code<<endl;
else
cout<< " code:"<<right<<code<<endl;
}
}
//____________________________________getFileSize_____________________________
long getFileSize(ifstream & file,streamoff i)
{
file.seekg(0,ios::end);
long fileSize=file.tellg();
file.seekg(i);
return fileSize;
}
//___________________________________getFileSize________________________________
long getFileSize(ifstream & file)
{
file.seekg(0,ios::end);
long fileSize=file.tellg();
file.seekg(0);
return fileSize;
}
//___________________________________bufferRead_______________________________
bool bufferRead( ifstream& input,int& readSize ,char* buffer,streamoff i,bool flag)
{
static long fileSize=getFileSize(input,i);
static int counter=0;
if (flag==true)
fileSize-=i;
if (readSize<=fileSize-counter*readSize){
counter++;
input.read(buffer,readSize);
return true;
}else
{
readSize=fileSize-counter*readSize;
counter=0;
input.read(buffer,readSize);
return false;
}
}
//_____________________________________bufferRead_______________________________
bool bufferRead( ifstream& input,int& readSize ,char* buffer)
{
static long fileSize=getFileSize(input);
static int counter=0;
if (readSize<=fileSize-counter*readSize){
counter++;
input.read(buffer,readSize);
return true;
}else
{
readSize=fileSize-counter*readSize;
counter=0;
fileSize=getFileSize(input);
input.read(buffer,readSize);
return false;
}
}
//__________________________________count_chars_________________________________
void count_chars(vector<int>& counts,int& readSize,char *buffer)
{
for (int i=0;i<readSize;++i)
{
char cht;
int tmp;
tmp=buffer[i];
if(tmp<0)
tmp+=256;
cht=tmp;
counts[tmp]++;
}
}
//_____________________________________maketree_________________________________
void makeTree(minHeap & q)
{
while ( q.size() > 1 ) {
node *child0 = new node( q.top() );
q.pop();
node *child1 = new node( q.top() );
q.pop();
q.push( node( child0, child1 ) );
}
}
//_____________________________________makeTree_________________________________
void makeTree(const vector<int> counts ,
priority_queue< node,vector<node>,greater<node> > & q)
{
node tmpNode;
for ( int i = 0 ; i < SIZE ; i++ )
if ( counts[ i ] )
{
assert(i != 300);
tmpNode=node( i, counts[ i ] );
q.push( tmpNode );
charWeight.push_back(make_pair(tmpNode.value,tmpNode.weight));
}
q.push(node (CTRLCHAR,0));
//making huffman tree
while ( q.size() > 1 ) {
node *child0 = new node( q.top() );
q.pop();
node *child1 = new node( q.top() );
q.pop();
q.push( node( child0, child1 ) );
}
}
//____________________________________toChar___________________________________
unsigned char toChar(string tmp)
{
//static ofstream fout("outcheck");
int sum=0,count=0;
for(int i=tmp.size()-1 ; i >=0 ;--i,++count)
sum+=(tmp[i]-'0')*pow(2.0,count);
//fout<<sum<<endl;
return static_cast<unsigned char> (sum);
}
//___________________________________writeBit___________________________________
void writeBit(char bit,bool nextReadFlag,string & endSt,ostream & fout)
{
static string asciiBuf="";
static int count=0;
unsigned char ch;
asciiBuf+=bit;
count++;
if(count==8)
{
ch=toChar(asciiBuf);
fout<<ch;
count=0;
asciiBuf="";
}
if(!nextReadFlag)
{
if (count !=0)
{
int emptyBits=8-count;
int remain=endSt.size()-emptyBits;
int id=0;
for(;id<emptyBits;id++)
{
if(id < endSt.size())
asciiBuf+=endSt[id];
else
asciiBuf+='0';
count++;
}
assert(count==8);
ch=toChar(asciiBuf);
fout<<ch;
asciiBuf="";
count=0;
if (remain <= 0)
return;
int extraBits=((remain/8)+1)*8;
for(;id < endSt.size();++id)
{
asciiBuf+=endSt[id];
count++;
extraBits--;
if(count==8)
{
count=0;
asciiBuf="";
}
}
while(count !=8)
{
asciiBuf+='0' ;
++count;
}
ch=toChar(asciiBuf);
fout<<ch;
}
}
}
//___________________________________makeCodedFile______________________________
void makeCodedFile(ofstream& fout,ifstream& fin,vector<string>& stArray,string format)
{
bool nextReadFlag=true;
fout.seekp(0);
fout<<dicSize;
for(vector< pair<char,int> >::iterator i=charWeight.begin();i<charWeight.end();++i)
if(i->first != CTRLCHAR)
fout<<" "<<i->first<<" "<<i->second;
string endSt=stArray[CTRLCHAR];
fout<<" "<<endSt<<" "<<format<<" ";
char * buffer=new char[1024];
fin.seekg(0);
int readSize=1024;
while(nextReadFlag)
{
nextReadFlag=bufferRead(fin,readSize,buffer);
for(int i=0 ; i < readSize ; ++i)
{
int ch=buffer[i];
if(ch<0)
ch+=256;
for(int j=0 ; j < stArray[ch].size(); ++j)
{
if(nextReadFlag || !(j == stArray[ch].size()-1 && i==readSize-1 ))
{
int tmp=stArray[ch].size();
writeBit(stArray[ch][j],true,endSt,fout);
}else
{
writeBit(stArray[ch][j],false,endSt,fout);
}
}
}
}
}
//_____________________________________bin2dec___________________________________
void dec2binHelp(int x,string & result)
{
if(x<2)
result+=(x+'0');
else
{
dec2binHelp(x/2,result);
result+=((x%2)+'0');
}
}
string dec2bin(int x)
{
static vector<string> memory(256,"");
string result="";
string lastResult="00000000";
if(memory[x]=="")
{
dec2binHelp(x,result);
for (string::iterator i=lastResult.end(),j=result.end(); j>result.begin() ;)
{
i--;
j--;
*i=*j;
}
memory[x]=lastResult;
}
return memory[x];
}
//____________________________________makeBinary_________________________________
void makeBinary(char * buffer,int size,vector<string> & codes)
{
int ascii;
for(int i=0 ; i<size ;++i)
{
ascii=buffer[i];
if(ascii<0)
ascii+=256;
codes.push_back(dec2bin(ascii));
}
}
//____________________________________encode______________________________________
void encode(char * buffer,const int & size,ofstream & fout)
{
static string ctrl="";
vector<string> codes;
const node * root=&q.top();
static const node * curPos=root;
makeBinary(buffer,size,codes);
for(vector<string>::iterator i=codes.begin(); i<codes.end() ;++i)
for(string::iterator j=i->begin(); j < i->end() ; ++j)
{
if(curPos->value==300) return;
if(*j=='0')
{
ctrl+='0';
curPos=curPos->child0;
}else
if(*j=='1')
{
ctrl+='1';
curPos=curPos->child1;
}
if(!curPos->child0)
{
if(curPos->value !=300)
{
fout<<(unsigned char)curPos->value;
curPos=root;
ctrl="";
}
}
}
}
//__________________________makeOutputName________________________________________
string makeOutputName(string file,string &format)
{
bool check=false;
string result="";
string::iterator it=file.begin();
while(*it != '.')
{
if(it == file.end()-1)
break;
result+=*it;
it++;
if(*it=='.')
check=true;
}
if(check)
{
result+=".rrm";
it++;
for(;it<file.end();++it)
format+=*it;
return result;
}
}
//____________________________makeOutforDec_______________________________________
string makeOutforDec(string fileName,string format)
{
int pos=fileName.find('.');
fileName.erase(pos);
fileName+="Copy.";
fileName+=format;
return fileName;
}
//____________________________________compress_____________________________________
void compress()
{
const int size=sizeof(char)*1024*1024;
char* buffer=new char [size];
int readSize=size;
vector<int> counts(SIZE,0);
bool repeat=true;
char * filenametmp=new char[100];
string filename;
string output,format="";
cout<<"enter file name:";
cin>>filenametmp;
filename=filenametmp;
output=makeOutputName(filename,format);
ifstream fin(filename.data(),ios::binary);
if ( !fin ) {
cerr << "Couldn't open the input file!\n";
system("pause");
throw "abort";
}
while(repeat)
{
repeat=bufferRead(fin,readSize,buffer);
count_chars( counts, readSize,buffer);
}
priority_queue< node,vector<node>, greater<node> > q;
makeTree(counts,q);
delete[] buffer;
buffer=NULL;
counts.clear();
ofstream fout(output.data(),ios::binary);
vector<string> stArray(SIZE,"");
q.top().traverse(stArray,fout);
makeCodedFile(fout,fin,stArray,format);
fin.close();
fout.close();
return;
}
//____________________________________decompress___________________________________
void decompress()
{
unsigned char temp;//testing purpose
bool repeat=true;
unsigned char chTemp;
int weight;
char formatTmp[10];
char endCtrlTmp[100];
char fileNameTmp[100];
const int size=sizeof(char)*1024*1024;
int readSize=size;
int dicSize;
char* buffer=new char [size];
string format,fileName,output="";
cout<<"Enter the rrm file name:";
cin>>fileNameTmp;
fileName=fileNameTmp;
ifstream fin(fileName.data(),ios::binary);
//ofstream fout("extracted.txt",ios::binary);
if ( !fin ) {
cerr << "Couldn't open the input file!\n";
system("pause");
throw "abort";
}
long int s=getFileSize(fin,0);
fin.close();
fin.open(fileName.data(),ios::binary);
fin>>dicSize;
while(dicSize--)
{
fin.read((char *)&chTemp,1);
fin.read((char *)&chTemp,1);
fin>>weight;
q.push(node(chTemp,weight));
}
q.push(node(300,0));
fin>>endCtrlTmp;
endCtrl=endCtrlTmp;
fin.read((char *)&chTemp,1);
fin>>formatTmp;
format=formatTmp;
fin.read((char *)&chTemp,1);
output=makeOutforDec(fileName,format);
ofstream fout(output.data(),ios::binary);
makeTree(q);
streamoff i=fin.tellg();
repeat=bufferRead(fin,readSize,buffer,i,true);
encode(buffer,readSize,fout);
while(repeat)
{
repeat=bufferRead(fin,readSize,buffer,i,false);
encode(buffer,readSize,fout);
}
//assert (s==fin.tellg());
fin.close();
fout.close();
return;
}
//_____________________________________main________________________________________________
int main()
{
bool check=true;
char ch;
while (check)
{
cout<<"Do you want to compress or decompress?(C/D)"<<endl;
cin>>ch;
if(ch=='D'|| ch=='d')
{
decompress();
check=false;
}
else
if(ch=='C' || ch=='c')
{
compress();
check=false;
}
}
system("pause");
return 0;
}