All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
zip.cc
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <assert.h>
3 #include <algorithm>
4 #include <map>
5 #include <iostream>
6 #include <fstream>
7 #include <sstream>
8 #include <string>
9 #include "boost/iostreams/slice.hpp"
10 #include "boost/iostreams/copy.hpp"
11 #include "boost/iostreams/filter/zlib.hpp"
12 #include "boost/iostreams/filtering_stream.hpp"
13 
14 
15 namespace io = boost::iostreams;
16 
17 using namespace std;
18 
19 namespace {
20  template <typename T> T get(istream& is);
21  template <typename I, typename J> I _get(istream& is) {
22  I x( get<J>(is) );
23  x |= I( get<J>(is) )<<(8*sizeof(J));
24  return x;
25  }
26  template <> uint8_t get<uint8_t>(istream& is) {
27  uint8_t x;
28  is.read((char*)&x,sizeof(uint8_t));
29  // TODO: if not OK, throw exception...
30  return x;
31  }
32  template <> uint16_t get<uint16_t>(istream& is) { return _get<uint16_t, uint8_t>(is); }
33  template <> uint32_t get<uint32_t>(istream& is) { return _get<uint32_t,uint16_t>(is); }
34  template <> uint64_t get<uint64_t>(istream& is) { return _get<uint64_t,uint32_t>(is); }
35 }
36 
37 struct ZipInfo {
38  uint16_t compress;
39  uint16_t time;
40  uint16_t date;
41  uint32_t crc;
42  uint32_t data_size;
43  uint32_t file_size;
44  uint32_t file_offset;
45  string name;
46 };
47 
48 class ZipFile {
49 public:
50  ZipFile(const string& name)
51  : m_name( name )
52  , m_file( name.c_str(), ios::in | ios::binary )
53  {
54  index();
55  }
56 
57  bool dump(const std::string& fname, ostream& os) {
58  map<string,ZipInfo>::const_iterator i = m_index.find(fname);
59  if ( i == m_index.end() ) return false;
60  cout << " found " << fname <<endl;
61  cout << " compress = " << i->second.compress << endl;
62  cout << " crc = " << i->second.crc << endl;
63  cout << " data_size = " << i->second.data_size << endl;
64  cout << " file_size = " << i->second.file_size << endl;
65  cout << " file_offset = " << i->second.file_offset << endl;
66 
67  m_file.seekg( i->second.file_offset,ios::beg);
68  uint32_t magic = get<uint32_t>(m_file);
69  if (magic!=0x04034B50) {
70  cout << "bad magic " << hex << magic << endl;
71  return false;
72  }
73  m_file.seekg(22,ios::cur);
74  uint16_t fnamsize = get<uint16_t>(m_file) ;
75  cout << " got fnamsize " << fnamsize << endl;
76  uint16_t extrasize= get<uint16_t>(m_file); // fname len + extra len
77  cout << " got extrasize " << extrasize << endl;
78  m_file.seekg( 0, ios::beg );
79  uint32_t offset = i->second.file_offset+30+fnamsize+extrasize;
80  if (i->second.compress == 0 ) {
81  cout << "creating slice at " << offset << " + " << i->second.data_size << endl;
82  io::copy( io::slice( m_file,offset,i->second.data_size), os );
83  return true;
84  } else if (i->second.compress == 8) {
85  io::zlib_params params; params.noheader = true;
86  io::filtering_istream in;
87  in.push(io::zlib_decompressor(params));
88  cout << "creating slice at " << offset << " + " << i->second.data_size << endl;
89  in.push(io::slice(m_file, offset,i->second.data_size));
90  io::copy(in,os);
91 
92  /*
93  std::stringstream data;
94  io::copy( io::slice( m_file,i->second.file_offset+30+fnamsize+extrasize,i->second.data_size), data );
95  data << 'Z' ; // data <<'\0';
96  io::filtering_istream in;
97  in.push(io::gzip_decompressor());
98  in.push(data);
99  io::copy( in, os ) ;
100  */
101  return true;
102  }
103  return false;
104  }
105 
106 private:
107 
108  long getlong(const unsigned char* buf) {
109  long l = (long)buf[0];
110  l |= (long)buf[1] << 8;
111  l |= (long)buf[2] << 16;
112  l |= (long)buf[3] << 24;
113  return l;
114  }
115 
116  void index() {
117  m_file.seekg(0,ios::end);
118  m_file.seekg(-22,ios::end);
119  std::ios::streamoff header_position = m_file.tellg();
120  char endof_central_dir[22];
121  m_file.read( endof_central_dir,sizeof(endof_central_dir));
122  if (getlong((unsigned char *)endof_central_dir) != 0x06054B50) {
123  cout <<"wrong magic: " << hex << getlong((unsigned char *)endof_central_dir)<<endl;
124  }
125  uint32_t header_size = getlong((unsigned char *)endof_central_dir + 12);
126  uint32_t header_offset = getlong((unsigned char *)endof_central_dir + 16);
127  uint32_t arc_offset = header_position - header_offset - header_size;
128  header_offset += arc_offset;
129 
130  /* Start of Central Directory */
131  for (;;) {
132  ZipInfo info;
133  m_file.seekg( header_offset, std::ios::beg);
134  uint32_t magic = get<uint32_t>(m_file);
135  if (magic != 0x02014B50) break; /* Bad: Central Dir File Header */
136  m_file.seekg( header_offset + 10, std::ios::beg);
137  info.compress = get<uint16_t>(m_file);
138  info.time = get<uint16_t>(m_file);
139  info.date = get<uint16_t>(m_file);
140  info.crc = get<uint32_t>(m_file);
141  info.data_size = get<uint32_t>(m_file);
142  info.file_size = get<uint32_t>(m_file);
143  uint16_t name_size = get<uint16_t>(m_file);
144  header_size = 46 + name_size + get<uint16_t>(m_file) + get<uint16_t>(m_file);
145  m_file.seekg( header_offset + 42, std::ios::beg);
146  info.file_offset = get<uint32_t>(m_file) + arc_offset;
147  std::stringstream fname;
148  io::copy(io::slice(m_file,0,name_size),fname );
149  info.name = fname.str();
150  header_offset += header_size;
151  m_index[ info.name ] = info;
152  }
153  cout << "got " << m_index.size() << "entries " << endl;
154  }
155 
156  string m_name;
157  mutable fstream m_file;
158  map<string,ZipInfo> m_index;
159 };
160 
161 int main(int argc, char **argv)
162 {
163  if (argc<2) {
164  cerr << "Must give at least filename" << endl;
165  ::exit(1);
166  }
167  ZipFile file(argv[1]);
168  int i = 1;
169  while (++i<argc) {
170  cout << "================== " << argv[i] << "==================" << endl;
171  if (! file.dump( string(argv[i]), cout ) ) {
172  cout << " NOT FOUND! " << endl;
173  }
174  }
175 }
string m_name
Definition: zip.cc:156
GAUDI_API long argc()
Number of arguments passed to the commandline (==numCmdLineArgs()); just to match argv call...
Definition: System.cpp:526
int main(int argc, char **argv)
Definition: zip.cc:161
def _get
Trivial function to access the data in TES.
Definition: GaudiAlgs.py:376
list argv
Definition: gaudirun.py:192
map< string, ZipInfo > m_index
Definition: zip.cc:158
string name
Definition: zip.cc:45
Definition: zip.cc:37
uint32_t file_size
Definition: zip.cc:43
uint16_t compress
Definition: zip.cc:38
Definition: zip.cc:48
list file
Definition: ana.py:160
uint16_t date
Definition: zip.cc:40
uint32_t data_size
Definition: zip.cc:42
tuple end
Definition: IOTest.py:101
dictionary l
Definition: gaudirun.py:365
ZipFile(const string &name)
Definition: zip.cc:50
uint32_t crc
Definition: zip.cc:41
bool dump(const std::string &fname, ostream &os)
Definition: zip.cc:57
long getlong(const unsigned char *buf)
Definition: zip.cc:108
fstream m_file
Definition: zip.cc:157
uint32_t file_offset
Definition: zip.cc:44
list i
Definition: ana.py:128
uint16_t time
Definition: zip.cc:39
void index()
Definition: zip.cc:116