// ============================================================================
// gzstream, C++ iostream classes wrapping the zlib compression library.
// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
// ============================================================================
//
// File          : gzstream.C
// Revision      : $Revision: 1.7 $
// Revision_date : $Date: 2003/01/08 14:41:27 $
// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
// 
// Standard streambuf implementation following Nicolai Josuttis, "The 
// Standard C++ Library".
// ============================================================================
#include "macros.hh"

#include "gzstream.h"
#include <iostream>
#include <string.h>  // for memcpy
#include <stdio.h>
#include <stdlib.h>
#include <cmath>
#include <cstdlib>

#ifdef GZSTREAM_NAMESPACE
namespace GZSTREAM_NAMESPACE {
#endif

// ----------------------------------------------------------------------------
// Internal classes to implement gzstream. See header file for user classes.
// ----------------------------------------------------------------------------

// --------------------------------------
// class gzstreambuf:
// --------------------------------------

gzstreambuf* gzstreambuf::open( const char* name, int open_mode) {
    if ( is_open())
        return (gzstreambuf*)0;
    mode = open_mode;
    // no append nor read/write mode
    if ((mode & std::ios::ate) || (mode & std::ios::app)
        || ((mode & std::ios::in) && (mode & std::ios::out)))
        return (gzstreambuf*)0;
    char  fmode[10];
    char* fmodeptr = fmode;
    if ( mode & std::ios::in)
        *fmodeptr++ = 'r';
    else if ( mode & std::ios::out)
        *fmodeptr++ = 'w';
    *fmodeptr++ = 'b';
    *fmodeptr = '\0';
    file = gzopen( name, fmode);
    if (file == 0)
        return (gzstreambuf*)0;
    opened = 1;
    return this;
}

gzstreambuf * gzstreambuf::close() {
    if ( is_open()) {
        sync();
        opened = 0;
        if ( gzclose( file) == Z_OK)
            return this;
    }
    return (gzstreambuf*)0;
}

int gzstreambuf::underflow() { // used for input buffer only
    if ( gptr() && ( gptr() < egptr()))
        return * reinterpret_cast<unsigned char *>( gptr());

    if ( ! (mode & std::ios::in) || ! opened)
        return EOF;
    // Josuttis' implementation of inbuf
    int n_putback = gptr() - eback();
    if ( n_putback > 4)
        n_putback = 4;
    memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback);

    int num = gzread( file, buffer+4, bufferSize-4);
    if (num <= 0) // ERROR or EOF
        return EOF;

    // reset buffer pointers
    setg( buffer + (4 - n_putback),   // beginning of putback area
          buffer + 4,                 // read position
          buffer + 4 + num);          // end of buffer

    // return next character
    return * reinterpret_cast<unsigned char *>( gptr());    
}

int gzstreambuf::flush_buffer() {
    // Separate the writing of the buffer from overflow() and
    // sync() operation.
    int w = pptr() - pbase();
    if ( gzwrite( file, pbase(), w) != w)
        return EOF;
    pbump( -w);
    return w;
}

int gzstreambuf::overflow( int c) { // used for output buffer only
    if ( ! ( mode & std::ios::out) || ! opened)
        return EOF;
    if (c != EOF) {
        *pptr() = c;
        pbump(1);
    }
    if ( flush_buffer() == EOF)
        return EOF;
    return c;
}

int gzstreambuf::sync() {
    // Changed to use flush_buffer() instead of overflow( EOF)
    // which caused improper behavior with std::endl and flush(),
    // bug reported by Vincent Ricard.
    if ( pptr() && pptr() > pbase()) {
        if ( flush_buffer() == EOF)
            return -1;
    }
    return 0;
}

// --------------------------------------
// class gzstreambase:
// --------------------------------------

gzstreambase::gzstreambase( const char* name, int mode) {
    init( &buf);
    open( name, mode);
}

gzstreambase::~gzstreambase() {
    buf.close();
}

void gzstreambase::open( const char* name, int open_mode) {
    if ( ! buf.open( name, open_mode))
        clear( rdstate() | std::ios::badbit);
}

void gzstreambase::close() {
    if ( buf.is_open())
        if ( ! buf.close())
            clear( rdstate() | std::ios::badbit);
}



//-----------------------------------------------------------------------
// User classes - bgenstream (others inherit everything in header file!)
//-----------------------------------------------------------------------

//bgenstream::bgenstream( const char* name){
//    unsigned char buffer[4];
//    aA="";		//initialise string variables
//    aB="";		//initialise string variables
//    rsidS="";		//initialise string variables
//    currentSnp=0;	//initialise currentSnp to 0 to be able to increment
//    readVarHead=false;	//whether the head of thevariant has been read but not the probabilities
//    input.open(file,ios::binary);	//open file
//    input.read((char*)(&offset),4);	//read offset
//    input.read((char*)(&LH),4);		//read LH
//    input.read((char*)(&M),4);		//read M
//    input.read((char*)(&N),4);		//read N
//    input.read((char*)(&buffer[0]),4);	//read magic number
//    ////////// TODO CHECK THAT MAGIC NUMBER IS CORRECT!!!!!!!!////////////////////
//    // check whether there's anything in the free data area, and if so skip it
//    if((LH-20)>0){
//        input.seekg((LH-20),ios_base::cur);
//    }
//    //read the flags
//    //Check twhich format the data is in
//    input.read((char*)(&buffer[0]),4);
//    compressed=((buffer[0] >> 0) & 1);	//CompressedSnpBlocks
//    layout=((buffer[0] >> 2) & 15);	//layout
//    sampleIds=((buffer[0]>>31) & 1);	//sampleIds
//    //Header block read so skip to the start of hte genotypes
//    input.seekg((offset+4),ios_base::beg);
//}

int bgenstream::read_sample_id_block(){
	using namespace std;
	unsigned int LSI,Nind,LS;
	unsigned char buf[4];
	vector<unsigned char> sampleCurrent;
	buf[0]='\0';
	buf[1]='\0';
	buf[2]='\0';
	buf[3]='\0';
	if(sampleIds){
		input.read((char*)(&LSI),4);
		if(!(LSI+LH>offset)){
			cerr<<"ERROR: malformed sample block"<<endl;
			exit(-9);
		}
		input.read((char*)(&Nind),4);
		if(Nind!=N){
			cerr<<"Error: number of individiuals in sample block larger than number of individuals indicated in header block"<<endl;
			exit(-9);
		}
		sampleIdentifiers.resize(Nind);
		for(unsigned int i=0;i<Nind;++i){
			input.read((char*)(&buf),2);
			LS=(buf[1]<<8) | buf[0];
			sampleCurrent.clear();
			sampleCurrent.resize(LS);
			input.read((char*)(&sampleCurrent[0]),LS);
			sampleIdentifiers[i].resize(LS);
			sampleIdentifiers[i]=string(sampleCurrent.begin(),sampleCurrent.end());
		}
	}
	return sampleIds;
}

bool bgenstream::read_variant_id(){
        using namespace std;
        unsigned int lrsid;
        unsigned char buf[4];
        unsigned long int uncompressedLength;
	vector<unsigned char> alleleTemp;
        //if v1.1 read number of individuals
	if(layout==1){
                input.read((char*)(&nv),4);
        }
        //read variant name
        //vid
        //set all bytes to zero as we'll be reading less than 4 bytes next!
        buf[0]='\0';
        buf[1]='\0';
        buf[2]='\0';
        buf[3]='\0';
        input.read((char*)(&buf),2);
        lrsid=(buf[1]<<8) | buf[0];
        vid.clear();
        vid.resize(lrsid);
        input.read((char*)(&vid[0]),lrsid);
	vidS.clear();
	vidS=string(vid.begin(),vid.end());
//cout<<vidS<<endl;
        input.read((char*)(&buf),2);
        lrsid=(buf[1]<<8) | buf[0];
        rsid.clear();
        rsid.resize(lrsid);
        input.read((char*)(&rsid[0]),lrsid);
        rsidS.clear();
        rsidS=string(rsid.begin(),rsid.end());
//cout<<rsidS<<endl;
        //chr
        input.read((char*)(&buf),2);
        lrsid=(buf[1]<<8) | buf[0];
        chr.clear();
        chr.resize(lrsid);
        input.read((char*)(&chr[0]),lrsid);
        //position
        input.read((char*)(&pos),4);
//cout<<pos<<endl;
        //if it's version 1.2 check how many alleles there are
        if(layout==2){
                input.read((char*)(&buf),2);
                K=(buf[1]<<8) | buf[0];
		alleleString.resize(K);
        }else{
		alleleString.resize(2);
	}
        //allele A
        input.read((char*)(&lrsid),4);
        alleleA.clear();
        alleleA.resize(lrsid);
        input.read((char*)(&alleleA[0]),lrsid);
        aA.clear();
        aA=string(alleleA.begin(),alleleA.end());
	alleleString[0]=string(alleleA.begin(),alleleA.end());
//cout<<aA<<endl;
        //allele B
        input.read((char*)(&lrsid),4);
        alleleB.clear();
        alleleB.resize(lrsid);
        input.read((char*)(&alleleB[0]),lrsid);
        aB.clear();
        aB=string(alleleB.begin(),alleleB.end());
	alleleString[1]=string(alleleB.begin(),alleleB.end());
//cout<<aB<<endl;
        //if v1.2 read in the other alleles
        if(layout==2){
                for(int i=2;i<K;++i){
                        input.read((char*)(&lrsid),4);
                        alleleTemp.clear();
                        alleleTemp.resize(lrsid);
                        input.read((char*)(&alleleTemp[0]),lrsid);
			alleleString[i]=string(alleleTemp.begin(),alleleTemp.end());
                }
        }
        //if v1.2 read in length of rest of data block
        if(layout==2){
                input.read((char*)(&nbytes),4);
        }else if(layout==1){    //v1.1
                //length of compressed data block
                if(compressed){
                        input.read((char*)(&nbytes),4);
                }else{
                        nbytes=6*N;
                }
        }else{
                cerr<<"ERROR: only bgen version 1.1 and 1.2 supported"<<endl;
                exit(-7);
        }
        //check number of individuals is the same as the number of individuals in the header block, else exit
        if(layout==1){
	        if(nv!=N){
        	        cerr<<"ERROR: Snp ";
                	for(vector<unsigned char>::const_iterator i=rsid.begin();i!=rsid.end();++i){
                        	cerr<<*i;
	                }
        	        cerr<<" has fewer individuals than the bgen header block indicates. I don't know which individuals these are so exiting"<<endl;
//			cerr<<layout<<"\t"<<nv<<"\t"<<N<<endl;
	                exit(10);
		}
        }
        currentSnp++;
        probs.clear();
        readVarHead=true;
	return true;
}

void bgenstream::read_variant_probabilities(){
        using namespace std;
        unsigned long int uncompressedLength=0;
        unsigned char *uncompressedBuf;
	float probBuf,probNext,prob1,prob2;
        unsigned char *compressedData;
        int uncompressedValue;
        unsigned int nalleles,minp,maxp,samplePloidy,phased,nbit,sampleMissing,mask,shift,fByte,lByte;
	unsigned int intBuf;

        if(!readVarHead){
                read_variant_id();
        }
        probs.clear();
        if(layout==1){
                uncompressedLength=6*N;
                uncompressedBuf=new unsigned char[uncompressedLength];
                if(compressed){
                        compressedData=new unsigned char[nbytes];
                        if(!input) {
                                EXIT_WITH_ERROR(10, "input not available");
                        }
                        input.read((char*)(&compressedData[0]),nbytes);
                        if(!input) {
                                EXIT_WITH_ERROR(10, "input.read failed");
                        }
                        uncompressedValue=uncompress(uncompressedBuf,&uncompressedLength,(const Bytef*)compressedData,nbytes);
                        delete [] compressedData;
                        if(uncompressedValue!=Z_OK){
                                std:: cerr << "Z_BUF_ERROR"  " " << Z_BUF_ERROR << std:: endl;
                                std:: cerr << "Z_MEM_ERROR"  " " << Z_MEM_ERROR << std:: endl;
                                std:: cerr << "Z_DATA_ERROR" " " << Z_DATA_ERROR << std:: endl;
                                EXIT_WITH_ERROR(10, "Error reading variant, exiting!");
                        }
                }else{
                        input.read((char*)(&uncompressedBuf[0]),uncompressedLength);
                }
                for(unsigned int i=0;i<uncompressedLength;i+=2){
                        probBuf=(uncompressedBuf[i+1]<<8) | uncompressedBuf[i];
                        probs.push_back(probBuf/32768.0);
                }
                delete [] uncompressedBuf;
        }else if(layout==2){
                if(compressed){
			input.read((char*)(&uncompressedLength),4);
			uncompressedBuf=new unsigned char[uncompressedLength];
			nbytes-=4;
                        compressedData=new unsigned char[nbytes];
                        input.read((char*)(&compressedData[0]),nbytes);
                        uncompressedValue=uncompress(uncompressedBuf,&uncompressedLength,(const Bytef*)compressedData,nbytes);
                        delete [] compressedData;
                        if(uncompressedValue!=0){
                                EXIT_WITH_ERROR(10, "Error reading variant, exiting!");
                        }
                }else{
			uncompressedLength=nbytes;
			uncompressedBuf=new unsigned char[uncompressedLength];
                        input.read((char*)(&uncompressedBuf[0]),uncompressedLength);
                }
                //number of individuals in data block (must equal N)
                nv=(uncompressedBuf[3]<<24) | (uncompressedBuf[2]<<16) | (uncompressedBuf[1]<<8) | uncompressedBuf[0];
		if(nv != N){
			cerr<<"ERROR: number of individuals in data block differs from that indicated by the header block"<<endl;
			exit(-11);
		}
                //number of alleles (must equal K);
                nalleles=(uncompressedBuf[5]<<8) | uncompressedBuf[4];
		if(nalleles!=K){
			cerr<<"ERROR: number of alleles in data block differs from that indicated by the header block"<<endl;
			exit(-12);
		}
                //min ploidy
                minp=uncompressedBuf[6];
                //max ploidy
                maxp=uncompressedBuf[7];
                //is the data phased
                phased=uncompressedBuf[8+nv];
                //how many bits are used to store each probability
                nbit=uncompressedBuf[9+nv];
		mask=(pow(2,nbit)-1.0);
		if(minp==2 && maxp==2 && phased==0 && nalleles==2){
			for(unsigned int i=0;i<2*nv;i+=2){
				samplePloidy=0;
				sampleMissing=0;
				samplePloidy=((uncompressedBuf[8+i/2] >> 0) & 63);
				sampleMissing=((uncompressedBuf[8+i/2] >> 7) & 1);
				//read 2 (ploidy)
				fByte=10+nv+floor(i*nbit/8.0);  // work out which byte contains the start of my number
				lByte=10+nv+ceil((i+1)*nbit/8.0);       // work out which byte contains the end of my number
				intBuf=0;
				probBuf=0;
				shift=80+nv*8+(i*nbit)-fByte*8; // work out which bit in fByte the mask needs to start
				intBuf=(uncompressedBuf[fByte]>>shift);
				for(unsigned int j=1;j<=lByte-fByte-1;j++){     // loop from first->last
					intBuf=(uncompressedBuf[j+fByte] << (j*8-shift)) | static_cast<int>(intBuf);    //probBuf=probBuf | current byte
				}
				probBuf=intBuf & mask;  // shift relevant bytes to the right and mask
				probs.push_back(probBuf/mask);
				prob1=probBuf/mask;
				//do second probability
				fByte=10+nv+floor((1+i)*nbit/8.0);      // work out which byte contains the start of my number
				lByte=10+nv+ceil((2+i)*nbit/8.0);       // work out which byte contains the end of my number
				intBuf=0;
				probBuf=0;
				shift=80+nv*8+((1+i)*nbit)-fByte*8;     // work out which bit in fByte the mask needs to start
				intBuf=(uncompressedBuf[fByte]>>shift);
				for(unsigned int j=1;j<=lByte-fByte-1;j++){     // loop from first->last
					intBuf=(uncompressedBuf[j+fByte] << (j*8-shift)) | static_cast<int>(intBuf);    //probBuf=probBuf | current byte
				}
				probBuf=intBuf & mask;  // shift relevant bytes to the right and mask
				probs.push_back(probBuf/mask);
				prob2=probBuf/mask;
				if(sampleMissing==0){
					probs.push_back(1-prob1-prob2);
				}else{
					probs.push_back(0);
				}
			}
		}else{
			EXIT_WITH_ERROR(11,"ERROR: Not all samples are diploid - format not currently supported");
		}

                delete [] uncompressedBuf;
        }
        readVarHead=false;
}

unsigned nChoosek(unsigned n, unsigned k){
	if(k > n) return 0;
	if(k*2 > n) k=n-k;
	if(k == 0) return 1;

	int result = n;
	for(unsigned int i=2;i<=k;++i){
		result *= (n-i+1);
		result /=i;
	}
	return result;
}

#ifdef GZSTREAM_NAMESPACE
} // namespace GZSTREAM_NAMESPACE
#endif

// ============================================================================
// EOF //
