// ============================================================================
// gzstream, C++ iostream classes wrapping the zlib compression library.
// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
// ============================================================================
//
// File          : gzstream.h
// Revision      : $Revision: 1.5 $
// Revision_date : $Date: 2002/04/26 23:30:15 $
// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
// 
// Standard streambuf implementation following Nicolai Josuttis, "The 
// Standard C++ Library".
// ============================================================================

#ifndef GZSTREAM_H
#define GZSTREAM_H

// standard C++ with new header file names and std:: namespace
#include <iostream>
#include <fstream>
#include <zlib.h>
#include <vector>
#include <string>
#include <cstdlib>
using std:: cerr;
using std:: endl;

#include "utils.hh"

#ifdef GZSTREAM_NAMESPACE
namespace GZSTREAM_NAMESPACE {
#endif

// ----------------------------------------------------------------------------
// Internal classes to implement gzstream. See below for user classes.
// ----------------------------------------------------------------------------

class gzstreambuf : public std::streambuf {
private:
    static const int bufferSize = 47+256;    // size of data buff
    // totals 512 bytes under g++ for igzstream at the end.

    gzFile           file;               // file handle for compressed file
    char             buffer[bufferSize]; // data buffer
    char             opened;             // open/close state of stream
    int              mode;               // I/O mode

    int flush_buffer();
public:
    gzstreambuf() : opened(0) {
        setp( buffer, buffer + (bufferSize-1));
        setg( buffer + 4,     // beginning of putback area
              buffer + 4,     // read position
              buffer + 4);    // end position      
        // ASSERT: both input & output capabilities will not be used together
    }
    int is_open() { return opened; }
    gzstreambuf* open( const char* name, int open_mode);
    gzstreambuf* close();
    ~gzstreambuf() { close(); }
    
    virtual int     overflow( int c = EOF);
    virtual int     underflow();
    virtual int     sync();
};

class gzstreambase : virtual public std::ios {
protected:
    gzstreambuf buf;
public:
    gzstreambase() { init(&buf); }
    gzstreambase( const char* name, int open_mode);
    gzstreambase( bool bgen);
    ~gzstreambase();
    void open( const char* name, int open_mode);
    void close();
    gzstreambuf* rdbuf() { return &buf; }
};

// ----------------------------------------------------------------------------
// User classes. Use igzstream and ogzstream analogously to ifstream and
// ofstream respectively. They read and write files based on the gz* 
// function interface of the zlib. Files are compatible with gzip compression.
// ----------------------------------------------------------------------------

class igzstream : public gzstreambase, public std::istream {
public:
    igzstream() : std::istream( &buf) {} 
    igzstream( const char* name, int open_mode = std::ios::in)
        : gzstreambase( name, open_mode), std::istream( &buf) {}  
    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
    void open( const char* name, int open_mode = std::ios::in) {
        gzstreambase::open( name, open_mode);
    }
};

class ogzstream : public gzstreambase, public std::ostream {
public:
    ogzstream() : std::ostream( &buf) {}
    ogzstream( const char* name, int mode = std::ios::out)
        : gzstreambase( name, mode), std::ostream( &buf) {}  
    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
    void open( const char* name, int open_mode = std::ios::out) {
        gzstreambase::open( name, open_mode);
    }
};

class bgenstream : public gzstreambase, public std::istream {
public:
    bgenstream() {}
    bgenstream( const char* name) {
        unsigned char buffer[4];
        unsigned int magic;
        aA="";              //initialise string variables
        aB="";              //initialise string variables
        rsidS="";           //initialise string variables
        currentSnp=0;       //initialise currentSnp to 0 to be able to increment
        readVarHead=false;  //whether the head of thevariant has been read but not the probabilities
        input.open(name,std::ios::binary);       //open file
	if(input.fail()){
		cerr << "!" << endl;
		cerr << "! fatal error : cannot open genotype file" << endl;
		cerr << "!" << endl;
		exit(1);
	}
        input.read((char*)(&offset),4);     //read offset
        input.read((char*)(&LH),4);         //read LH
        input.read((char*)(&M),4);          //read M
        input.read((char*)(&N),4);          //read N
        input.read((char*)(&magic),4);  //read magic number
        if(!(magic==1852139362 || magic==0)){
            std::cerr<<"ERROR: Malformed bgen file. \'Magic number\' bytes not valid"<<std::endl;
            exit(-8);
        }
        // check whether there's anything in the free data area, and if so skip it
        if((LH-20)>0){
            input.seekg((LH-20),std::ios_base::cur);
        }
        //read the flags
        //Check which format the data is in
        input.read((char*)(&buffer[0]),4);
        compressed=((buffer[0] >> 0) & 1);  //CompressedSnpBlocks
        layout=((buffer[0] >> 2) & 15);     //layout
        if(!(layout==1 || layout==2)){
            std::cerr<<"ERROR: file is in bgen format "<<layout<<". Only bgen files v1.1 and v1.2 supported"<<std::endl;
            exit(-8);
        }
        sampleIds=((buffer[0]>>31) & 1);    //sampleIds
	if(sampleIds){
		read_sample_id_block();
	}
        //Header block read so skip to the start of hte genotypes
        input.seekg((offset+4),ios_base::beg);
    }
    int read_sample_id_block();
    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
    void open( const char* name, int open_mode = std::ios::in) {
        gzstreambase::open( name, open_mode);
    }
    bool read_variant_id(); //read the next variant ID
    void read_variant_probabilities();      //read the probabilities from the previously read variant
    void skip_variant_probabilities();      //skip the probabilities and go to the next variant data block
    unsigned int get_m() const {return M;}          // get M
    unsigned int get_n() const {return N;}          // get N
    std::string get_rsid() const {return rsidS;}         //get rsidS
    std::string get_vid() const {return vidS;}         //get rsidS
    unsigned int get_pos() const {return pos;}      //get pos
    std::string get_pos_string() const {return utils::to_str(pos);} // get pos as a string (hopefully)
    unsigned int cSnp() const {return currentSnp;}  //get currentSnp
    std::string get_aA() const {return aA;}              //get aA
    std::string get_aB() const {return aB;};              //get aB
    std::vector<float> get_probs() {return probs;}       // get probs
    std::vector<float> probs;    // probabilities for last read variant
    std::vector<std::string> alleleString;	//string containing allele labels (for v1.2 files with more than 2 alleles)

private:
    unsigned int offset;    //offset from start of file where genotype blocks start
    unsigned int LH;        //length in bytes of the header
    unsigned int M;         //number of SNPs
    unsigned int N;         //number of individuals
    bool compressed;        //whether the genotype data is zlib compressed
    int layout;             //layout (bgen version v1.x)
    bool sampleIds;         //sampleIDs - whether sample IDs are stored in the file
    unsigned int nv;        //number of individuals in current data block (must equal N but for some reason stored in each block too)
    unsigned int K;         //number of alleles (if layout==2, else undefined)
    unsigned int currentSnp;                //counter to keep track of which SNP has just been read
    bool readVarHead;       //bool to say whether we're part way through a variant or not
    std::vector<unsigned char> vid,rsid; //variant identifier and rsid of current variant
    std::vector<unsigned char> chr;      //chromosome identifier (not always chromosome, especially if encoded using qctool!!!!!!!!!!!!!)
    unsigned int pos;       //position of current variant
    unsigned int nbytes;    // length of compressed data block (if compressed, else uninitiated!!)
    std::vector<unsigned char> alleleB;  //alleles
    std::vector<unsigned char> alleleA;
    std::string rsidS;   //string version of rsid
    std::string vidS;    //string version if vid
    std::string aA,aB;   // string versions of the alleles
public:
    std::ifstream input; // ifstream containing opened bgen file
private:
    std::vector<std::string> sampleIdentifiers;	//vector of strings of sample Ids from sample identifier block (if present)
};

unsigned nChoosek(unsigned n, unsigned k);

#ifdef GZSTREAM_NAMESPACE
} // namespace GZSTREAM_NAMESPACE
#endif

#endif // GZSTREAM_H
// ============================================================================
// EOF //

