18#ifdef __ZLIB_AVAILABLE__
20#include "BgzfFileTypeRecovery.h"
41class RecoveryGzipHeader {
52 RecoveryGzipHeader() :
73 uint8_t ID1() {
return m_ID1;}
74 uint8_t ID2() {
return m_ID2;}
75 uint8_t CM() {
return m_CM;}
76 uint8_t FLG() {
return m_FLG;}
77 uint32_t MTIME() {
return m_MTIME;}
78 uint8_t XFL() {
return m_XFL;}
79 uint8_t OS() {
return m_OS;}
80 uint16_t XLEN() {
return m_XLEN;}
82 return (m_ID1==31 && m_ID2==139 && m_CM==8 && m_FLG==4 && m_MTIME==0 && m_XFL == 0 && m_OS == 255 && m_XLEN==6);
86class BGZFHeader :
public RecoveryGzipHeader {
98 ) : m_SI1(m_SI1), m_SI2(m_SI2), m_SLEN(m_SLEN), m_BSIZE(m_BSIZE) {;}
99 uint8_t SI1() {
return m_SI1;}
100 uint8_t SI2() {
return m_SI2;}
101 uint16_t SLEN() {
return m_SLEN;}
102 uint16_t BSIZE() {
return m_BSIZE;}
104 return RecoveryGzipHeader::sane() &&
105 (m_SI1==
'B' && m_SI2==
'C' && m_SLEN==2 && m_BSIZE >
sizeof(BGZFHeader));
136class PeekaheadBuffer :
public std::vector<uint8_t> {
139 ssize_t m_startPosition;
148 ssize_t startPosition() {
return m_startPosition;}
158 if(dataRemaining() < (ssize_t) (std::vector<uint8_t>::size() / 8) ) {
159 erase(begin(), begin() + m_startPosition);
165 virtual ReturnCode sync();
168 virtual ~PeekaheadBuffer();
171 ssize_t dataRemaining();
184 virtual ReturnCode readahead(ssize_t count) = 0;
188 ReturnCode read(uint8_t *buffer, ssize_t count) {
191 rc = readahead(count);
194 uint8_t *src = &(*begin()) + m_startPosition;
195 uint8_t *dest = buffer;
197 memcpy(dest, src, count);
199 m_startPosition += count;
203 }
else if(rc == reSync) {
216PeekaheadBuffer::PeekaheadBuffer() : m_startPosition(0)
220PeekaheadBuffer::~PeekaheadBuffer()
224PeekaheadBuffer::ReturnCode PeekaheadBuffer::sync() {
229ssize_t PeekaheadBuffer::dataRemaining()
231 return std::vector<uint8_t>::size() - m_startPosition;
236class FileReader :
public PeekaheadBuffer {
241 FileReader(FILE *stream);
242 PeekaheadBuffer::ReturnCode readahead(ssize_t count);
243 FILE *stream() {
return m_stream;}
244 bool eof() {
return m_stream ? feof(m_stream) : false;}
247FileReader::FileReader()
252FileReader::FileReader(FILE *stream) : m_stream(stream)
256FileReader::~FileReader()
268PeekaheadBuffer::ReturnCode FileReader::readahead(ssize_t count)
270 uint8_t buffer[4096];
271 while(dataRemaining() < count) {
272 int bytesRead = fread(buffer, 1,
sizeof(buffer), m_stream);
274 if(ferror(m_stream)) {
281 fprintf(stderr,
"\n\n");
283 for(
int i=0;i<bytesRead;i+=16) {
284 fprintf(stderr,
"%08x: ", i);
285 for(
int j=0;j<16;j++) {
286 if(buffer[i+j]==31 && buffer[i+j+1]==139) {
289 fprintf(stderr,
"%02x ", buffer[i+j]);
291 fprintf(stderr,
"\n");
294 fprintf(stderr,
"possible signature at %08x\n", possible);
297 insert(end(), &buffer[0], &buffer[0] + bytesRead);
302class BGZFReader :
public PeekaheadBuffer {
303 FileReader m_fileReader;
307 BGZFReader(FILE *stream) : m_fileReader(stream) {;}
309 PeekaheadBuffer::ReturnCode readahead(ssize_t count);
318 PeekaheadBuffer::ReturnCode rc;
319 while((rc = m_fileReader.readahead(
sizeof(BGZFHeader)))==ok ) {
321 if(rc==endOfFile)
return rc;
323 void *src = &(*(m_fileReader.begin())) + m_fileReader.startPosition();
324 header = (BGZFHeader *) src;
326 if(debug) std::cerr <<
"BGZFReader::sync returning reSync\n";
330 uint8_t throwAwayBuffer;
331 rc = m_fileReader.read(&throwAwayBuffer, 1);
335 FILE *stream() {
return m_fileReader.stream();}
337 bool eof() {
return dataRemaining()==0 && m_fileReader.eof();}
341PeekaheadBuffer::ReturnCode BGZFReader::readahead(ssize_t count)
345 uint8_t inflateBuffer[64*1024];
346 uint8_t gzipBuffer[64*1024+1];
348 while(dataRemaining() < count) {
349 static int loopCount = 0;
351 if(debug) std::cerr <<
"BGZFReader::readahead loopcount = " << loopCount++ <<
"\n";
360 PeekaheadBuffer::ReturnCode rc = m_fileReader.read((uint8_t *) (&header),
sizeof(header));
362 if(rc == endOfFile) {
370 if(debug) std::cerr <<
"BGZFReader::readahead found corrupt BGZF header - now calling sync()\n";
381 rc = m_fileReader.read((uint8_t *) &gzipBuffer, header.BSIZE() + 1 -
sizeof(header));
384 if(debug) std::cerr <<
"BGZFReader::readahead got incomplete BGZF read - now calling sync()\n";
393 if(rc == endOfFile) {
397 PeekaheadBuffer::ReturnCode bgzf_rc = ok;
406 zs.next_in = gzipBuffer;
407 zs.avail_in = header.BSIZE() - 16;
408 zs.next_out = inflateBuffer;
409 zs.avail_out =
sizeof(inflateBuffer);
414 if(inflateInit2(&zs, -15) != Z_OK) {
416 if(debug) std::cerr <<
"BGZFReader::readahead - inflateInit2 failed (out of memory?)\n";
419 if(bgzf_rc==ok && inflate(&zs, Z_FINISH) != Z_STREAM_END) {
421 if(debug) std::cerr <<
"BGZFReader::readahead - inflate failed (bad data), calling sync()\n";
425 if(inflateEnd(&zs) == Z_OK) {
427 if(debug) std::cout <<
"hey, got data! zs.total_out == " << zs.total_out <<
"\n";
430 insert(end(), &inflateBuffer[0], &inflateBuffer[0] + zs.total_out);
434 if(debug) std::cerr <<
"BGZFReader::readahead - inflateInit2 failed (out of memory?)\n";
457 std::vector<uint8_t>::iterator position;
458 BGZFReader::ReturnCode rc;
460 std::cout <<
"size = " << b.dataRemaining() <<
"\n";
469 rc = b.readahead(64);
470 std::cout <<
"rc = " << rc <<
" - expect ok (1)\n";
471 std::cout <<
"size (expect 64) = " << b.size() <<
"\n";
475int main(
int argc,
const char **argv)
483int BgzfFileTypeRecovery::close()
485 if(bgzfReader)
delete bgzfReader;
491BgzfFileTypeRecovery::BgzfFileTypeRecovery(
const char * filename,
const char * mode)
493 if(tolower(mode[0])==
'r') {
494 FILE *f = fopen(filename,
"r");
495 bgzfReader =
new BGZFReader(f);
498 if(debug) std::cerr <<
"Unable to open " << filename <<
" in mode " << mode <<
".\n";
506bool BgzfFileTypeRecovery::operator == (
void * rhs)
508 throw std::logic_error(
"BgzfFileTypeRecovery::operator == is dangerous - do not use");
512bool BgzfFileTypeRecovery::operator != (
void * rhs)
514 throw std::logic_error(
"BgzfFileTypeRecovery::operator != is dangerous - do not use");
518int BgzfFileTypeRecovery::eof()
520 return bgzfReader->eof();
523unsigned int BgzfFileTypeRecovery::write(
const void * buffer,
unsigned int size)
529int BgzfFileTypeRecovery::read(
void * buffer,
unsigned int size)
532 if(bgzfReader == NULL) {
536 PeekaheadBuffer::ReturnCode rc = bgzfReader->read((uint8_t *) buffer, size);
541 case PeekaheadBuffer::endOfFile:
544 case PeekaheadBuffer::reSync:
546 if(debug) std::cerr <<
"throwing BGZF sync exception\n";
547 throw std::runtime_error(
"BGZF stream resync");
548 case PeekaheadBuffer::ok:
560int64_t BgzfFileTypeRecovery::tell()
566bool BgzfFileTypeRecovery::seek(int64_t offset,
int origin)
573bool BgzfFileTypeRecovery::attemptRecoverySync(
bool (*checkSignature)(
void *data) ,
int length)
581 while( bgzfReader->readahead(length) == PeekaheadBuffer::ok) {
583 void *src = &(*(bgzfReader->begin())) + bgzfReader->startPosition();
589 if((*checkSignature)(src))
return true;
590 PeekaheadBuffer::ReturnCode rc = bgzfReader->read((uint8_t *) &ch, 1);
591 if(rc!=PeekaheadBuffer::ok)
return false;