//generates a checksum for the file. This is a very basic custom algorthm that ignores stuff like carriage returns and anything that is considered a C++ comment
string GetCheckSum(string data)
{
string mfile; //modified local file
for(int i=0;i<data.size();i++)
{
if(i<(data.size()+1) && data[i]=='/' && data[i+1]=='/') //1 line comment, ignore everything till new line
{
for(;i<data.size();i++) //loop till we find a \n or \r
{
if(data[i]=='\n' || data[i]=='\r')break;
}
continue;
}
if(i<(data.size()+1) && data[i]=='/' && data[i+1]=='*') //multi line comment, ignore everything till comment ending
{
for(;i<data.size();i++) //loop till we find end of comment
{
if(i<(data.size()+1) && data[i]=='*' && data[i+1]=='/')break;
}
continue;
}
//some chars we just want to ignore completely:
if(data[i]>=127)continue; //extended ascii
if(data[i]<=31)continue; //some control chars including carriage returns
mfile.append(1,data[i]);
}
//debug:
//cout<<"Org file:"<<endl<<data<<endl<<endl<<"New file: "<<endl<<mfile<<endl<<"size:"<<mfile.size()<<endl<<endl;
/*now that we have a clean data without comments and stuff, we do some misc computations to come up with a checksum.
These are pretty much winged, and it really does not matter what is done, the idea is to just come up with a somewhat unique
string that will change if even a minor change is done to the file. */
const unsigned int CS_Size=16; //how big in bytes the checksum is (this is converted to hex string after)
const unsigned int CS_SizeD=CS_Size*2; //double CS_Size (to avoid computing multiplication each time)
string workdata=mfile;
workdata="test"; //debug
//if file is empty we add one character (this will ensure next step works)
if(workdata.size()<1)workdata.append(1,' '); //the actual data does not really matter so we'll just put a space
//Ensure file is at least the same size as (CS_SizeD), if not we add padding:
for(int i=0;workdata.size()<CS_SizeD;i++)
{
workdata.append(1,' '); //add padding
}
//Add some padding to the file so that it has blocks in a multiple of CS_SizeD:
int diff = CS_SizeD-(workdata.size()%CS_SizeD);
if(diff==CS_SizeD)diff=0;
cout<<"Dif: "<<diff<<endl; //debug
for(int i=0;i<diff;i++)
{
workdata.append(1,workdata[i]); //add padding
}
/* Note on padding: the actual padding we add really does not matter, as we're not really trying to do anything random at this point
the first round is spaces, second round is just repeating the workdata. The only reason for difference is to make debugging a bit easier
technically if the first round runs, the second won't be required as the size will be well rounded. */
cout<<HexDump(workdata)<<endl<<"size:"<<workdata.size()<<endl; //debug
/*here is where the fun begins. We basically have at least two "rows" of CS_Size.
We'll just do some pseudo random manipulation of the data to combine them, then keep combining till we get a string
that is CS_Size, which is the hash. */
string checksum=workdata.substr(0,CS_Size); //initiate checksum with the first row
string row1="";
string row2="";
unsigned int tmp1=0;
unsigned int tmp2=0;
unsigned int tmp3=0;
//some misc vars used for pseudo random generation:
unsigned int pr1=0;
unsigned int pr2=0;
do
{
//cout<<"workdata size:"<<(workdata.size())<<endl; //debug
//get data to form 2 rows:
row1=workdata.substr(0,CS_Size);
row2=workdata.substr(CS_Size,CS_Size);
//delete only one row:
workdata.erase(0,CS_Size);
//cout<<"row1: "<<HexDump(row1)<<endl<<"row2: "<<HexDump(row2)<<endl; //debug
//combine the two rows with some pseudo random basic math and mixing:
for(int i=0;i<CS_Size;i++)
{
int i2=CS_Size-i;
tmp1=(unsigned char)checksum[i];
tmp2=(unsigned char)row1[i];
tmp3=(unsigned char)row2[i];
pr1+=(unsigned char)row1[i2];
tmp1 = (tmp1+1) * (tmp2 ^ pr1)/(i2+1) + i + tmp3/3;
//cout<<" [i: "<<i<<" i2: "<<i2<<" tmp1: "<<tmp1<<" tmp2: "<<tmp2<<" tmp3: "<<tmp3<<" pr1: "<<pr1<<"] "; //debug
checksum[i]=tmp1%256;
}
pr1 = pr1/CS_Size;
//cout<<endl<<"checksum: "<<HexDump(checksum)<<endl<<"size: "<<(workdata.size())<<endl; //debug
}while(workdata.size()>=CS_Size);
//cout<<endl<<endl<<"Final Checksum: "<<HexDump(checksum)<<endl<<endl;//debug
return BitStream::Str2HexStr(checksum,0,false);
}