7#include <openssl/sha.h>
12bool obtain_offset_nbytes(
const vector<string>& str_vec, vector<size_t>& offsets, vector<size_t>& nbytes);
14void string_tokenize(
const string &in_str,
const char delim,vector<string>&out_vec);
16size_t string_to_size_t(
const string& str);
18bool retrieve_chunk_info(FILE*,vector<size_t> &offsets,vector<size_t> &nbytes);
20string retrieve_data_sha256(FILE*,
const vector<size_t> &offsets,
const vector<size_t> &nbytes);
22short write_sha256_file(
char* m_dmrpp_fname,
char* m_h5_fname,
char* m_sha256_fname,
const string & sha256_buf);
24short update_sha256_file(
char* m_dmrpp_fname,
char* m_h5_fname,
char* m_sha256_fname,
char*stored_fname,
const string & sha256_buf);
26string to_hex(
unsigned char s) {
34int main(
int argc,
char **argv ) {
37 cout<<
"Please provide four arguments: "<< endl;
38 cout<<
" The first is the dmrpp file that contains the missing variable value information. "<<endl;
39 cout<<
" The second is the hdf5 file path that stores the missing variable values. "<<endl;
40 cout<<
" The third is the text file that stores the file path and the sha256 value." <<endl;
41 cout<<
" The fourth is the text file that stores the final HDF5 file path for this dmrpp file. "<<endl;
45 FILE* fp_dmrpp = fopen(argv[1],
"r");
46 if(fp_dmrpp == NULL) {
47 cout<<
"The dmrpp file doesn't exist"<<endl;
51 vector<size_t>offsets;
53 bool ret_chunk = retrieve_chunk_info(fp_dmrpp,offsets,nbytes);
54 if(
false == ret_chunk) {
55 cout<<
"Cannot retrieve the chunk info from the dmrpp file successfully. "<<endl;
61 FILE* fp_h5 = fopen(argv[2],
"r");
63 cout<<
"The HDF5 file doesn't exist"<<endl;
67 string sha256_buf = retrieve_data_sha256(fp_h5,offsets,nbytes);
69 cout<<
"The sha256 of this file doesn't exist"<<endl;
75 short ret_value = update_sha256_file(argv[1],argv[2],argv[3],argv[4],sha256_buf);
81short write_sha256_file(
char* m_dmrpp_fname,
char* m_h5_fname,
char* m_sha256_fname,
const string & sha256_buf) {
83 short sha_fname_ret = 1;
84 FILE*fp = fopen(m_sha256_fname,
"a");
85 string fname_str(m_h5_fname);
86 string dname_str(m_dmrpp_fname);
87 string file_content = fname_str +
' '+dname_str+
' '+sha256_buf+
'\n';
88 vector<char>buf(file_content.begin(),file_content.end());
89 size_t fsize = fwrite(buf.data(),1,file_content.size(),fp);
90 if(fsize != file_content.size())
99short update_sha256_file(
char* m_dmrpp_fname,
char* m_h5_fname,
char* m_sha256_fname,
char* store_h5_fname,
const string & sha256_buf) {
120 ifstream sha_fstream;
121 sha_fstream.open(m_sha256_fname,ifstream::in);
126 if (!sha_fstream.is_open()){
127 return write_sha256_file(m_dmrpp_fname,m_h5_fname,m_sha256_fname,sha256_buf);
135 bool space_fname_ret =
true;
136 bool need_add_sha256 =
true;
138 while(getline(sha_fstream,sha_line)) {
140 size_t fname_epos = sha_line.find(space_char);
141 if(fname_epos==string::npos) {
142 space_fname_ret =
false;
146 size_t dname_epos = sha_line.find(space_char,fname_epos+1);
147 if(dname_epos==string::npos) {
148 space_fname_ret =
false;
152 string f_sha256_buf = sha_line.substr(dname_epos+1);
153 if(f_sha256_buf == sha256_buf) {
155 need_add_sha256 =
false;
157 string exist_m_h5_name = sha_line.substr(0,fname_epos);
158 string exist_m_dmrpp_name = sha_line.substr(fname_epos+1,dname_epos-fname_epos-1);
161 FILE*fp = fopen(store_h5_fname,
"a");
162 string file_content = exist_m_h5_name +
' '+exist_m_dmrpp_name;
163 vector<char>buf(file_content.begin(),file_content.end());
164 size_t fsize = fwrite(buf.data(),1,file_content.size(),fp);
165 if(fsize != file_content.size())
174 if(
false == space_fname_ret)
176 if(
false == need_add_sha256)
180 if(
true == space_fname_ret) {
181 if(
true == need_add_sha256) {
182 ret_value = write_sha256_file(m_dmrpp_fname,m_h5_fname,m_sha256_fname,sha256_buf);
190string retrieve_data_sha256(FILE*fp,
const vector<size_t> &offsets,
const vector<size_t> &nbytes){
194 unsigned char hash[SHA256_DIGEST_LENGTH];
197 for(
size_t i = 0; i <nbytes.size();i++)
205 for(
size_t i = 0; i<offsets.size();i++) {
207 if(fseek(fp,offsets[i],SEEK_SET)!=0)
209 fread(&buf[cur_size],1,nbytes[i],fp);
210 cur_size +=nbytes[i];
214 SHA256((
const unsigned char*)buf.data(),fSize,hash);
219 for(
int i =0; i<SHA256_DIGEST_LENGTH;i++)
220 output+=to_hex(hash[i]);
226bool retrieve_chunk_info(FILE*fp,vector<size_t> &offsets,vector<size_t> &nbytes) {
231 if(fseek(fp,0,SEEK_END)!=0)
240 if(fseek(fp,0,SEEK_SET)!=0)
244 buf.resize((
size_t)fSize);
245 size_t result = fread(buf.data(),1,fSize,fp);
249 string str(buf.begin(),buf.end());
251 vector<string> str_vec;
252 string_tokenize(str,delim,str_vec);
254 bool get_offset_nbytes = obtain_offset_nbytes(str_vec,offsets,nbytes);
255 if(
false == get_offset_nbytes) {
256 cout<<
"cannot successfully obtain the offset and nbytes. \n";
261 for (
int i = 0; i <offsets.size();i++) {
262 cout<<
"offset["<<i<<
"]= " <<offsets[i] <<endl;
263 cout<<
"nbyte["<<i<<
"]= " <<nbytes[i] <<endl;
267 return get_offset_nbytes;
275bool obtain_offset_nbytes(
const vector<string>& str_vec, vector<size_t>& offsets, vector<size_t>& nbytes){
278 vector<string>chunk_info_str;
279 string delim1 =
"chunk offset=\"";
280 string delim2 =
"nBytes=\"";
283 vector<size_t> unfiltered_offsets;
284 vector<size_t> unfiltered_nbytes;
287 for(
size_t i = 0; i <str_vec.size(); i++)
288 if(str_vec[i].find(delim1)!=string::npos)
289 chunk_info_str.push_back(str_vec[i]);
292 for(
size_t i = 0; i<chunk_info_str.size();i++) {
293 size_t co_spos = chunk_info_str[i].find(delim1);
294 size_t co_epos = chunk_info_str[i].find(delim3,co_spos+delim1.size());
295 if(co_epos==string::npos) {
299 string temp_offset=chunk_info_str[i].substr(co_spos+delim1.size(),co_epos-co_spos-delim1.size());
300 unfiltered_offsets.push_back(string_to_size_t(temp_offset));
302 size_t nb_spos = chunk_info_str[i].find(delim2,co_epos);
303 size_t nb_epos = chunk_info_str[i].find(delim3,nb_spos+delim2.size());
304 if(nb_epos==string::npos) {
308 string temp_nbyte=chunk_info_str[i].substr(nb_spos+delim2.size(),nb_epos-nb_spos-delim2.size());
309 unfiltered_nbytes.push_back(string_to_size_t(temp_nbyte));
316 for(
size_t i = 0; i<unfiltered_nbytes.size();i++) {
317 if(unfiltered_nbytes[i] != 0) {
318 offsets.push_back(unfiltered_offsets[i]);
319 nbytes.push_back(unfiltered_nbytes[i]);
328void string_tokenize(
const string &in_str,
const char delim,vector<string>&out_vec) {
329 stringstream ss_str(in_str);
331 while (getline(ss_str,temp_str,delim)) {
332 out_vec.push_back(temp_str);
337size_t string_to_size_t(
const string& str) {
338 stringstream sstream(str);