#define VERSION_NAME "QUICKTEST"
#define VERSION_NUMBER "0.99b"
#define VERSION_DATE "Sep 21 2016"

// QUICKTEST: statistical analysis of uncertain genotypes
// Copyright 2007, 2008, 2009, Toby Johnson, Zoltan Kutalik, Alisa Manning and Han Chen

// QUICKTEST is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published
// by the Free Software Foundation; either version 3 of the License,
// or (at your option) any later version.

// QUICKTEST is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// The GNU General Public License can be viewed at
// http://www.gnu.org/licenses


// QUICKTEST was written by
//
// Toby Johnson            and   Zoltan Kutalik
// Toby.Johnson@unil.ch    and   Zoltan.Kutalik@unil.ch
//
// Alisa Manning           and   Han Chen
// amanning@bu.edu         and   hanchen@bu.edu

// Rob Beaumont            and   Aaron McDaid
// R.Beaumont@exeter.ac.uk and   aaron.mcdaid@gmail.com

// QUICKTEST must be compiled using -lz -llapack -lRmath
// This requires a system with libRmath, and F77 lapack routines

// Version history
//
// v0.1: first version
//
// v0.2: changes:
// reads SNPTEST format sample file (phenotype file; with the two header lines)
//
// v0.3: changes:
// implementation of EM algorithm to find MLE, Wald test and LRT
// implementation of Marchini et al.'s score test
// replaced simulation method by MCMC method
// complete change to command line argument structure
// transparently reads .gz genotype files if necessary
// added permutation tests for mean method
// added --only and --copy options
//
// v0.4: changes:
// fixed computation of rSqHat
// use --missing-code -9 --ooops-line, or --snptest to read SNPTEST .sample file; 
//   otherwise (default) would expect more like R write.table(x,row.names=F,...)
// implemented estimation of normal mixture model for phenotype, and 
//    ``corresponding'' EM algorithm, mean and score methods for estimation of beta
// implemented Newton method as alternative to EM
// implemented ``Pettitt' rank-likelihood based method
//
// v0.5: changes:
// developed hybrid Newton-EM algorithm
//
// v0.6: changes:
// changed options (--method-MLE) and headers of output file
//
// v0.7: changes:
// made possible simultaneous QQnormal and normal method
// reorganised many command line options and output column names
// implemented interaction tests for a single covariate
// added --copy-calls option


// v0.90: first release version
// v0.91: fixed bug in method_logistic (matrix a was destroyed and not rebuilt)
// v0.92: made treatment of covariates more consistent:
//         most methods calculate and use residual phenotype, except
//         method-interaction and method-binary
// v0.93: fixed bug in method_interaction that gave incorrect p-values
//         when multiple covariates used           
// v0.94: added --test-HW
// v0.95: added --method-robust to estimate robust variance-covariance matrix
//               by Alisa Manning (amanning@bu.edu) and Han Chen (hanchen@bu.edu)
//               This option can NOT be used unless --method-interaction is used
//        fixed memory leak in method_robust and method_interaction
//        initialised arrays in method_robust to zero
//        added include <cstring>, include<algorithms> 
//           and fixed some implicit casts that generate warnings with some g++
//
// v0.96: added --method-poe to estimate parent of origin effect 
//                           
// v0.97: added --multiSNP option
//        added --loci option
//
//
// v1.00: added --bgen option for UK Biobank (Rob Beaumont & Aaron McDaid)
//
//
// to do:
//    --coding    specifies how to code the genotypes
//                  a means 0/1/2 for AA/AB/BB
//                  d means 0/1/1 for AA/AB/BB
//                  r means 0/0/1 for AA/AB/BB
//                  c means 1/0/1 for AA/AB/BB (e.g. for CNV states <2/2/>2)
//
// trap if --perm-verbose is going to request an unreasonable amount of RAM...
//
// perform SAM in RAM if desired (needs pre-estimate of number of SNPs)
// 2.5e6 * 100 = 2.5e8 doubles = 1e9 bytes = 1Gb is possible
//
// spew all printout into a string, so that we can print it to screen, log file, and output file too...

#include <cstdlib>  
#include <iostream>
#include <fstream>
#include <sstream>
#include <ctime>

//STL
#include <list>
#include <vector>
#include <iterator>

using namespace std; 

#include <ext/hash_set>
using namespace __gnu_cxx;


#define MATHLIB_STANDALONE
#include <Rmath.h>

#include <cmath>

#include "gzstream.h"
#include "snphwe.h"
#include "as.h"
#include "quicktest.h"
#include "stats.h"


#include "macros.hh"

#include "utils.hh"
using utils:: vec_of_vecs;


// function prototypes



int read_phenotype_file (io_spec* io, list<double>* phenotype, list<bool>* phenotypeokay,
			 int* numCovar, list<double>* covariate);

int read_mix_hint (io_spec* io, int bigK, vector<double>* pi, vector<double>* mu, vector<double>* sigma);





int main (int argc, char** argv) {

  // startup message

  cout << endl;
  cout << "############################################################" << endl;
  cout << "#" << endl;
  cout << "# " << VERSION_NAME << " version " << VERSION_NUMBER << endl;
  cout << "# by  Toby Johnson           and  Zoltan Kutalik" << endl;
  cout << "#    <Toby.Johnson@unil.ch>      <Zoltan.Kutalik@unil.ch>" << endl;
  cout << "# source code last modified on " << VERSION_DATE << endl;
  cout << "# compiled at " << __TIME__ << " on " __DATE__ << endl;
  cout << "#" << endl;
  cout << "############################################################" << endl;
  cout << "#" << endl;

  // following structure contains choices about analysis method(s) to
  // use and related options, one or more can be selected.  default
  // constructor sets all methods to false
  method_spec method; 

  // following stucture contains choices about input/output
  io_spec io; //

  // how to code genotypes for linear model
  string coding = "a";
  // should generalise to specify number of genotypes and arbitrary coding....
  
  // should allow ``hack'' where p-values come from normalised trait, 
  // but beta comes from raw trait, and SE is hack-back-calculated

  // for Simulation method, should be part of method_spec
  int nsims = -999; // set to negative number so can trap if not set by user

  // Prior specification, default corresponds to classical approach
  double priorDF = -2.;
  double ppbeta = 0.;

  // Options for analysing permuted phenotypes
  bool BesagClifford = false; // --perm-Besag-Clifford
  int threshBesagClifford = 0; // number of successes needed to terminate
  int maxPermutations = 0;  // maximum number of trials

  bool permVerbose = false; // --perm-verbose
  int numPermutations = 0;

  bool onlyFilter = false;
  string onlyFilename;

  bool lociFilter = false;
  string lociFilename;

  bool option__ukbb = false;

  bool bgen = false;

  // parse command line arguments, throw immediate error if bad

  cout << "# program " << argv[0] << " running" << endl;
  cout << "# with following command line arguments:" << endl;
  cout << "#" << endl;
  
  string argstr;
  for (int arg = 1; arg < argc; ++arg) {
    argstr = argv[arg];
    if (argstr.length() < 3 || argstr.substr(0,2) != "--") {
      cerr << "!" << endl;
      cerr << "! fatal error: command line argument [ " << argstr << " ] not recognised" << endl;
      cerr << "!" << endl;
      return(1);
    } else {
      argstr = argstr.substr(2,argstr.length()-2); // chop off the initial --

      //////////////////////////////////////
      //
      // input and output file specification

      if (argstr == "geno") {
	// --geno specifies genotype file name
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --geno requires genotype filename" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  io.genotypeFilelist->push_back(argv[arg]);
	  cout << "#           --geno " << io.genotypeFilelist->back() << endl;
	}
	
      } else if (argstr == "only") {
	// --only specifies inclusion list file name
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --only requires inclusion list filename" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  onlyFilter = true;
	  onlyFilename = argv[arg];
	  cout << "#           --only " << onlyFilename << endl;
	}

      }	else if (argstr == "loci") {
	// --loci specifies inclusion list file name
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --loci requires inclusion list filename" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  lociFilter = true;
	  lociFilename = argv[arg];
	  cout << "#           --loci " << lociFilename << endl;
	}
	
      } else if (argstr == "pheno") {
	// --pheno specifies phenotype file name
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --pheno requires phenotype filename" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  io.phenotypeFilename = argv[arg];
	  cout << "#           --pheno " << io.phenotypeFilename << endl;
	}

      } else if (argstr == "npheno") {
	// --io.nPheno specifies name or number of phenotype to analyse
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --npheno requires value" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  io.nPheno = argv[arg];
	  cout << "#           --npheno " << io.nPheno << endl;
	}

      } else if (argstr == "ooops-line") {
	cout << "#           --ooops-line" << endl;
	io.specialOoopsLine = true;
	
      } else if (argstr == "missing-code") {
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --missing-code requires numerical value" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  io.missingCode = argv[arg];
	  io.specialMissingCode = true;
	  cout << "#           --missing-code " << io.missingCode << endl;
	}

      } else if (argstr == "snptest") {
	cout << "#           --snptest" << endl;
	io.specialOoopsLine = true;
	io.missingCode = "-9";
	io.specialMissingCode = true;
	
      } else if (argstr == "exclude") {
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --exclude requires exclusion list filename" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  io.exclusions = true;
	  io.exclusionsFilename = argv[arg];
	  cout << "#           --exclude " << io.exclusionsFilename << endl;
	}

      } else if (argstr == "ignore-ties") {
	cout << "#           --ignore-ties" << endl;
	io.ignoreTies = true;

      } else if (argstr == "out") {
	// --out specifies output file name
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --out requires output filename" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  io.outputFilename = argv[arg];
	  cout << "#           --out " << io.outputFilename << endl;
	}

      } else if (argstr == "copy") {
	// --copy specifies file name for copying genotype probabilities
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --copy requires copyfile filename" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  method.copy = true;
	  method.copyFilename = argv[arg];
	  cout << "#           --copy " << method.copyFilename << endl;
	}

      } else if (argstr == "copy-calls") {
	// --copy-calls alters behaviour of --copy
	cout << "#           --copy-calls" << endl;
	method.copycalls = true;

      } else if (argstr == "copy-dose") {
	// --copy-dose alters behaviour of --copy
	cout << "#           --copy-dose" << endl;
	method.copydose = true;

//       } else if (argstr == "coding") {
// 	// --coding specifies coding of genotypes
// 	++arg;
// 	if (arg==argc) {
// 	  cerr << "!" << endl;
// 	  cerr << "! fatal error: command line argument --coding requires argument" << endl;
// 	  cerr << "!" << endl;
// 	  return(1);
// 	} else {
// 	  coding = argv[arg];
// 	  cout << "#           --coding " << coding << endl;
// 	}

      } else if (argstr == "no-progress") {
	cout << "#           --no-progress" << endl;
	io.progress = false;


	/////////////////////////////////////////////////////
	//
	// methods for phenotype modelling and transformation

      } else if (argstr == "no-normal") {
	cout << "#           --no-normal" << endl;
	method.normal = false;

      } else if (argstr == "mixture") {
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --mixture requires integer >1 value" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  istringstream iss (argv[arg]);
	  if (iss >> method.mixBigK && method.mixBigK > 0) {
	    cout << "#           --mixture " << method.mixBigK << endl;
	    method.mix = true;
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error: command line argument --mixture requires integer >1 value" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}

      } else if (argstr == "mix-hint") {
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --mix-hint requires filename" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  io.mixhint = true;
	  io.mixhintFilename = argv[arg];
	  cout << "#           --mix-hint " << io.mixhintFilename << endl;
	}

      } else if (argstr == "mix-centered") {
	cout << "#           --mix-centered" << endl;
	method.mixCentered = true;

      } else if (argstr == "qqnormal") {
	cout << "#           --qqnormal" << endl;
	method.qqnormal = true;
	io.quantileNormalise = true;

	///////
	//
	// methods for assessing genotype certainty and other summary stats

      } else if (argstr == "compute-MAF") {
	cout << "#           --compute-MAF" << endl;
	method.MAF = true;
	
      } else if (argstr == "compute-rSqHat") {
	cout << "#           --compute-rSqHat" << endl;
	method.rSqHat = true;
	
      } else if (argstr == "compute-alphaHat") {
	cout << "#           --compute-alphaHat" << endl;
	method.alphaHat = true;
	
      } else if (argstr == "test-HW") {
	cout << "#           --test-HW" << endl;
	method.Hardy = true;
	
	////////
	//
	// methods for dealing with uncertain genotypes
	
      } else if (argstr == "method-call") {
	cout << "#           --method-call" << endl;
	method.call = true;
	
      } else if (argstr == "method-mean") {
	cout << "#           --method-mean" << endl;
	method.mean = true;
	
      } else if (argstr == "method-score") {
	cout << "#           --method-score" << endl;
	method.score = true;

      } else if (argstr == "method-ML") {
	cout << "#           --method-ML" << endl;
	method.ML = true;

      } else if (argstr == "method-MCMC") {
	cout << "#           --method-MCMC" << endl;
	method.MCMC = true;

      } else if (argstr == "method-multiSNP") {
	cout << "#           --method-multiSNP" << endl;
	method.multiSNP = true;
	
      } else if (argstr == "call-thresh") {
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --call-thresh requires numerical value" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  istringstream iss (argv[arg]);
	  if (iss >> method.callThresh) {
	    cout << "#           --call-thresh " << method.callThresh << endl;
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error: command line argument --call-thresh requires numerical value" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}

      } else if (argstr == "test-beta") {
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --test-beta requires numerical value" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  istringstream iss (argv[arg]);
	  if (iss >> method.testBeta) {
	    method.test = true;
	    cout << "#           --test-beta " << method.testBeta << endl;
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error: command line argument --test-beta requires numerical value" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}

      } else if (argstr == "perm-Besag-Clifford") {

	if (arg >= argc-2) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --perm-Besag-Clifford requires integers 0 < I1 <= I2" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  ++arg;
	  istringstream iss (argv[arg]);
	  if (iss >> threshBesagClifford && threshBesagClifford > 0) {
	    ++arg;
	    istringstream iss (argv[arg]);
	    if (iss >> maxPermutations && maxPermutations > threshBesagClifford) {
	      cout << "#           --perm-Besag-Clifford " << threshBesagClifford << " " << maxPermutations << endl;
	      BesagClifford = true;
	    } else {
	      cerr << "!" << endl;
	      cerr << "! fatal error: command line argument --perm-Besag-Clifford requires integers 0 < I1 <= I2" << endl;
	      cerr << "!" << endl;
	      return(1);
	    }
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error: command line argument --perm-Besag-Clifford requires integers 0 < I1 <= I2" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}

      } else if (argstr == "perm-verbose") {
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --perm-verbose requires integer value" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  istringstream iss (argv[arg]);
	  if (iss >> numPermutations && numPermutations > 0) {
	    cout << "#           --perm-verbose " << numPermutations << endl;
	    permVerbose = true;
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error: command line argument --perm-verbose requires integer value" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}

      } else if (argstr == "sim-num") {
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --sim-num requires integer value" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  istringstream iss (argv[arg]);
	  if (iss >> nsims && nsims > 0) {
	    cout << "#           --sim-num " << nsims << endl;
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error: command line argument --sim-num requires integer value" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}



      } else if (argstr == "sim-seed") {
	if (arg >= argc-2) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --sim-seed requires two integer values" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  ++arg;
	  istringstream iss (argv[arg]);
	  if (iss >> method.seed1 && method.seed1 > 0) {
	    ++arg;
	    istringstream iss (argv[arg]);
	    if (iss >> method.seed2 && method.seed2 > 0) {
	      cout << "#           --seed " << method.seed1 << " " << method.seed2 << endl;
	    } else {
	      cerr << "!" << endl;
	      cerr << "! fatal error: command line argument --sim-seed requires two integer values" << endl;
	      cerr << "!" << endl;
	      return(1);
	    }
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error: command line argument --sim-seed requires two integer values" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}

	// special options for binary analysis

      } else if (argstr == "method-binary") {
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --method-binary requires numerical value" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  istringstream iss (argv[arg]);
	  if (iss >> method.binThresh) {
	    method.binary = true;
	    cout << "#           --method-binary " << method.binThresh << endl;
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error: command line argument --method-binary requires numerical value" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}


	// special options for interaction

      } else if (argstr == "method-interaction") {
	cout << "#           --method-interaction" << endl;
	method.interaction = true;

      } else if (argstr == "method-robust") {
	cout << "#           --method-robust" << endl;
	method.robust = true;

      } else if (argstr == "ncovar") {
	// io.nCovar specifies name or number of interaction covariate to analyse
	++arg;
	if (arg==argc) {
	  cerr << "!" << endl;
	  cerr << "! fatal error: command line argument --ncovar requires value" << endl;
	  cerr << "!" << endl;
	  return(1);
	} else {
	  io.nCovar->push_back(argv[arg]);
	  cout << "#           --ncovar " << io.nCovar->back() << endl;
	}


	//special options for parent-of-origin testing
      } else if (argstr=="method-poe") {
	cout << "#           --method-poe" << endl;
	method.poe = true;



//       } else if (argstr == "prior-df") {
// 	++arg;
// 	if (arg==argc) {
// 	  cerr << "!" << endl;
// 	  cerr << "! fatal error: command line argument --prior-df requires numerical value" << endl;
// 	  cerr << "!" << endl;
// 	  return(1);
// 	} else {
// 	  istringstream iss (argv[arg]);
// 	  if (iss >> priorDF) {
// 	    cout << "#           --prior-df " << priorDF << endl;
// 	  } else {
// 	    cerr << "!" << endl;
// 	    cerr << "! fatal error: command line argument --prior-df requires numerical value" << endl;
// 	    cerr << "!" << endl;
// 	    return(1);
// 	  }
// 	}

//       } else if (argstr == "prior-precbeta") {
// 	++arg;
// 	if (arg==argc) {
// 	  cerr << "!" << endl;
// 	  cerr << "! fatal error: command line argument --prior-precbeta requires nonnegative numerical value" << endl;
// 	  cerr << "!" << endl;
// 	  return(1);
// 	} else {
// 	  istringstream iss (argv[arg]);
// 	  if (iss >> ppbeta && ppbeta >=0 ) {
// 	    cout << "#           --prior-precbeta " << ppbeta << endl;
// 	  } else {
// 	    cerr << "!" << endl;
// 	    cerr << "! fatal error: command line argument --prior-precbeta requires nonnegative numerical value" << endl;
// 	    cerr << "!" << endl;
// 	    return(1);
// 	  }
// 	}
	
	}
	else if (argstr=="ukbb") {
		cout << "#           --ukbb" << endl;
		option__ukbb = true;
		/* 22 22:16050075_A_G rs587697622 16050075 A G 1 0 0 1 0 0 1 0 0 1 0 0 1 ......
		 *
		 * First 6 columns are: chr, then ID, then rs#, position, allele1, allele2.
		 */
		// 1  snp1            1                    A T 0 0 1 0 0 1 0
        }
    else if (argstr == "bgen") {
	// --bgen specifies that genotypeFilelist is in bgen format
		cout << "#           --bgen" << endl;
		bgen = true;
	}
	else {
	cerr << "!" << endl;
	cerr << "! fatal error: command line argument [ --" << argstr << " ] not recognised" << endl;
	cerr << "!" << endl;
	return(1);
      }	
    }
  }


  // sanity checking of command line arguments, 
  // more helpful to report *all* errors at once, and then die at the end of checking

  bool die = false;

  if (io.phenotypeFilename == "") {
    cerr << "!" << endl;
    cerr << "! fatal error: must set phenotype filename using --pheno command line argument" << endl;
    die = true;
  }
  if (io.outputFilename == "") {
    cerr << "!" << endl;
    cerr << "! fatal error: must set output filename using --out command line argument" << endl;
    die = true;
  }
  if (method.MCMC && nsims <= 0) {
    cerr << "!" << endl;
    cerr << "! fatal error: must set number of simulations using --sim-num command line argument" << endl;
    die = true;
  }
  if (permVerbose && BesagClifford) {
    cerr << "!" << endl;
    cerr << "! fatal error: cannot use both --perm-verbose and --perm-Besag-Clifford" << endl;
    die = true;
  }
  if ((permVerbose || BesagClifford) && method.test) {
    cerr << "!" << endl;
    cerr << "! fatal error: cannot use any --perm-XXX with --test-beta" << endl;
    die = true;
  }
  if (method.MCMC && method.mix) {
    cerr << "!" << endl;
    cerr << "! fatal error: cannot use any --method-MCMC with --mixture (sorry!)" << endl;
    die = true;
  }
  if (!method.normal && !method.mix && !method.qqnormal) {
    cerr << "!" << endl;
    cerr << "! fatal error: cannot use --no-normal without using either --mixture or --qqnormal" << endl;
    die = true;
  }
  if (method.interaction && io.nCovar->empty()) {
    cerr << "!" << endl;
    cerr << "! fatal error: must use --ncovar to set interaction covariate name when using --method-interaction" << endl;
    die = true;
  }
  if (method.interaction && method.binary) {
    cerr << "!" << endl;
    cerr << "! fatal error: cannot use --method-interaction and --method-binary simultaneously" << endl;
    die = true;
  }
  if (method.robust && !method.interaction) {
    cerr << "!" << endl;
    cerr << "! fatal error: cannot use --method-robust when NOT using --method-interaction" << endl;
    die = true;
  }
  

  if (die) {
    cerr << "!" << endl;
    return(1);
  }

  // test that at least one analysis method or action is going to be used  

  if (io.genotypeFilelist->empty()) {
    cout << "!" << endl;
    cout << "! warning : no genotype filename(s) provided" << endl;
    cout << "!         : use --geno command line argument" << endl; 
    cout << "!         :  otherwise, no association analysis will be computed" << endl; 
    cout << "!" << endl;
  }

  if (method.interaction && !method.mean) {
    cout << "!" << endl;
    cout << "! warning: using --method-mean because you used --method-interaction" << endl;
    cout << "!" << endl;
    method.mean = true;
  }

  if (!(method.copy || method.call || method.mean || method.score || method.ML || method.MCMC || method.binary || method.multiSNP || method.poe)) {
    cout << "!" << endl;
    cout << "! warning : no analysis method or action was specified" << endl;
    cout << "!         : use --method-XXX, see documentation for choices of XXX" << endl;
    cout << "!         :  otherwise, only basic summary statistics will be computed" << endl;
    cout << "!" << endl;
  }

  if (method.call && method.mix) {
    cout << "!" << endl;
    cout << "! warning: --method-call does not work with --mixture (sorry!)" << endl;
    cout << "!" << endl;
  }

  if (method.MCMC && method.mix) {
    cout << "!" << endl;
    cout << "! warning: --method-MCMC does not work with --mixture (sorry!)" << endl;
    cout << "!" << endl;
  }

  if ((method.copy || method.call || method.score || method.ML || method.MCMC || method.binary || method.poe || method.mix) && method.multiSNP) {
    cerr << "!" << endl;
    cerr << "! fatal error: --method-multiSNP can only be combined with --method-mean OR --qqnormal (sorry!)" << endl;
    cerr << "!" << endl;
    return(1);
  }

  if ((method.copy || method.call || method.score || method.ML || method.MCMC || method.binary || method.multiSNP || method.mix) && method.poe) {
    cerr << "!" << endl;
    cerr << "! fatal error: --method-poe can only be combined with --method-mean  OR --qqnormal (sorry!)" << endl;
    cerr << "!" << endl;
    return(1);
  }

  /////
  //
  // print messages about analysis that is planned

  cout << "#" << endl;
  cout << "# expecting to process [ " << io.genotypeFilelist->size() << " ] genotype files" << endl;

  if (method.call) {
    cout << "# running analysis using calling method (best guess genotypes)" << endl;
    cout << "#    using calling threshold [ " << method.callThresh << " ]" << endl;
  }
  if (method.mean) cout << "# running analysis using mean method (expected genotype dosages)" << endl;
  if (method.score) cout << "# running analysis using score method (quadratic approximation)" << endl;
  if (method.ML) cout << "# running analysis using MLE/LRT method" << endl;
  if (method.MCMC) {
    cout << "# running analysis using Markov chain Monte Carlo simulations" << endl;
    cout << "#    using [ " << nsims << " ] simulations per SNP" << endl;
  }
  if (method.multiSNP) cout << "# running multi-SNP analysis using mean method" << endl;
  if (method.interaction) cout << "# running interaction analysis using mean method" << endl;
  if (method.robust) cout << "# using robust method to estimate variance-covariance matrix" << endl;  
  if (method.binary) {
    cout << "# running binary analysis using logistic mean method" << endl;
  }
  if (method.poe) {
    cout << "# running parent-of-origin test using mean method" << endl;
  }

//   if (priorDF == -2. && ppbeta == 0.) {
//     cout << "# all inferences are classical" << endl;
//   } else {
//     cout << "# all inferences are Bayesian" << endl;
//     cout << "#    prior is NIG with a = 0, d = [ " << priorDF << " ], m = (0 0), 1/V = (0 0; 0 [ " << ppbeta << " ])" << endl;
//   }

  if (method.MCMC || method.mix || permVerbose || BesagClifford) {
    cout << "# seeded random number generator using [ "<<method.seed1<<" "<<method.seed2<<" ]" << endl;
    // needed for simulation and MCMC methods, permutation, EM initial guesses,
  }
  set_seed(method.seed1,method.seed2);

  // read phenotype file (R or SNPTEST format)
  // count number of non-NA phenotypes and remember which columns they are

  list<double> phenotype;
  list<bool> phenotypeokay;
  list<double> covariate;

  int nCov = 0;
  int n = read_phenotype_file(&io, &phenotype, &phenotypeokay, &nCov, &covariate);
  // returns number of phenotypes read successfully
  if (n == 0) return(1);

  int nVar = nCov+3;
  double* yRaw = new double [n]; // the phenotype as read from the input file
  vec_of_vecs vars(nVar,n); // access with [ii*n + jj] => (i,j)
  // columns 0..nCov-1 are the covariates
  // column  nCov      will be all 1's
  // column  nCov+1    will be used for genotype
  // column  nCov+2    will be used for genotype*var for interactions
  vec_of_vecs vars2(nVar,nVar);
  vector<double> varty(nVar);
  // this is t(var) %*% yRaw

  for (int i = 0; i < n; ++i) {
    // check for internal error
    if (phenotype.empty()) {
      cerr << "!" << endl;
      cerr << "! internal error : could not pop phenotype off list, list is empty!" << endl;
      cerr << "!" << endl;
      return (1);
    }
    yRaw[i] = phenotype.front();
    phenotype.pop_front();

    for (int ii = 0; ii < nCov; ++ii) {
      if (covariate.empty()) {
	cerr << "!" << endl;
	cerr << "! internal error : could not pop covariate off list, list is empty!" << endl;
	cerr << "!" << endl;
	return (1);
      }
      vars(ii, i) = covariate.front();
      covariate.pop_front();
    }
    vars(nCov, i) = 1.;
  }
  // check for internal errors
  if (!phenotype.empty()) {
    cerr << "!" << endl;
    cerr << "! internal error : phenotypes popped off list but list not empty!" << endl;
    cerr << "!" << endl;
    return (1);
  }
  if (!covariate.empty()) {
    cerr << "!" << endl;
    cerr << "! internal error : covariates popped off list but list not empty!" << endl;
    cerr << "!" << endl;
    return (1);
  }

  vector<double> yOrig (n); // == yRaw but vector rather than array
  vector<double> yResid (n); // residuals after regressing onto covariates
  vector<double> yNorm (n); // yResid, qq normalised
  vector<int> yRank (n); // ranks of yResid in UNSTABLE sort

  double sumyyOrig; // really is sum of *non-centered* yOrig
  double sumyy;
  double sumyyNorm;

  for (int i = 0; i < n; ++i) yOrig[i] = yRaw[i];

  if (!preprocess_phenotype (&io, yRaw, n, vars, vars2, varty, nCov, nVar,
			     &yResid, &yNorm, &yRank, &sumyyOrig, &sumyy, &sumyyNorm)) {
    exit(100);
  }

  vector<double> pi (method.mixBigK);
  vector<double> mu (method.mixBigK);
  vector<double> sigma (method.mixBigK);
  vector<double> piRef (method.mixBigK);
  vector<double> muRef (method.mixBigK);
  vector<double> sigmaRef (method.mixBigK);
  double alphaMLE;
  int foonits; double foodiff; // we don't care about return values
  if (method.mix) {

    if (io.mixhint) {
      if (!read_mix_hint(&io, method.mixBigK, &pi, &mu, &sigma)) {
	cerr << "!" << endl;
	cerr << "! fatal error : parse failed" << endl;
	cerr << "!" << endl;
	return (1);
      }
      if (!refit_mixture (yResid, n, sumyy, method.mixBigK, false, &pi, &mu, &sigma, true) ||
	  !method_mix_null (yResid, n, method.mixBigK, pi, mu, sigma, &foonits, &alphaMLE, &foodiff, true)) {
	cerr << "!" << endl;
	cerr << "! fatal error : failed to fit mixture model" << endl;
	cerr << "!" << endl;
	return (1);
      }
    } else {
      if (!fit_mixture (yResid, n, sumyy, method.mixBigK, method.mixCentered, &pi, &mu, &sigma, &io, true) ||
	  !method_mix_null (yResid, n, method.mixBigK, pi, mu, sigma, &foonits, &alphaMLE, &foodiff, true)) {
	cerr << "!" << endl;
	cerr << "! fatal error : failed to fit mixture model" << endl;
	cerr << "!" << endl;
	return (1);
      }
    }
    for (int k = 0; k < method.mixBigK; ++k) {
      piRef[k] = pi[k];
      muRef[k] = mu[k];
      sigmaRef[k] = sigma[k];
    }
  }

  vector<double> pcmL1 (n);
  vector<double> pcmL2 (n);
  ///// THIS IS ALSO NEEDED FOR THE SCORE MIXTURE MEAN GENOTYPE METHOD
  if (method.score && method.mix) {
    precompute_score_mix (&pcmL1, &pcmL2, yResid, n, pi, mu, sigma, method.mixBigK);
  }

  if (BesagClifford) {
    // max storage to be used for y and y2 will be 10^8 doubles
    numPermutations = min(int(5e7/n), maxPermutations);
  }
  if (method.test) {
    numPermutations = 1;
  }

  vector<double>** y = new vector<double>* [numPermutations+1];
  vector<double>** y2 = new vector<double>* [numPermutations+1];

  // now, (*y[p])[i] will be the phenotype for the p-th permutation, in the i-th individual
  // the 0-th permutation is not permuted, i.e. is the observed data
  y[0] = new vector<double> (n);
  for (int i = 0; i < n; ++i) {
    (*y[0])[i] = yResid[i];
  }

  y2[0] = new vector<double> (n);
  for (int i = 0; i < n; ++i) {
    (*y2[0])[i] = pow2((*y[0])[i]);
  }


  // we use this for any permutation on the integers {0,1,...,n-1}
  // declared with scope all of main(), since will be needed in main loop 
  // if Besag--Clifford method exceeds the cache of permutations
  vector<int> iPerm (n);
  for (int i = 0; i < n; ++i) {
    iPerm[i] = i;
  }

  // make permuted phenotype vectors
  if (method.test || permVerbose || BesagClifford) {
    cout << "# pre-generating [ " << numPermutations << " ] permutations of phenotypes" << endl;
    for (int p = 1; p <= numPermutations; ++p) {
      y[p] = new vector<double> (n);
      y2[p] = new vector<double> (n);
      random_shuffle(iPerm.begin(), iPerm.end(), randInt);
      for (int i = 0; i < n; ++i) {
	(*y[p])[i] = (*y[0])[iPerm[i]];
	(*y2[p])[i] = (*y2[0])[iPerm[i]];
      }
    }
  }

  double* normordBigA = NULL;
  double* normordSmallA = NULL;
  if (method.Pettitt) {
    cout << "# precomputing covariance matrix for Pettitt rank likelihood method" << endl;
    normordBigA = new double [(n+1)*(n+1)];
    normordSmallA = new double [n+1];
    if (precompute_Pettitt (normordBigA, normordSmallA, n)) {
      cerr << "! failed" << endl;
      return (101);
    }
  }

  ////// up to here okay
  
  if (method.interaction) {
    if (nCov == 0) {
      cerr << "!" << endl;
      cerr << "! fatal error : no covariates read, but at least one is needed for --method-interaction" << endl;
      cerr << "!" << endl;
      return (102);
    } else if (nCov > 1) {
      cout << "!" << endl;
      cout << "! warning : only first covariate [ " << io.nCovar->front() << " ] will be tested for interaction" << endl;
      cout << "!         : other covariates are used for adjustment only" << endl;
      cout << "!" << endl;
    }
  }


  vector<double> meanG (n); // used for method_mean, interaction and logistic
  double beta; // beta-hat
  double se;
  double pval;
  double varAA;
  double varAB;
  double varBB;
  vector<double> logipars (nCov+1);
  double betasnp;
  double sesnp;
  double pvalsnp;
  double covsnpinteraction;

  // all declared here because needed as dummy (untouched) argument for prefitting logistic model


  vector<int> yBin (n);
  int numBinOne = 0;
  if (method.binary) {
    for (int i = 0; i < n; ++i) {
      yBin[i] = yOrig[i] >= method.binThresh ? 1 : 0;
      numBinOne += yBin[i];
    }      
    cout << "# for binary phenotype, " << numBinOne << " individuals have phenotype >= " << method.binThresh << endl;
    cout << "#                   and " << n-numBinOne << " individuals have phenotype < " << method.binThresh << endl;
    if (numBinOne == 0 || numBinOne == n) {
      cerr << "!" << endl;
      cerr << "! fatal error : no variation in binary trait" << endl;
      cerr << "!" << endl;
      return(500);
    }

    int foonits = method_logistic(yBin, meanG, n, vars, nCov, &logipars, true, &beta, &se, &pval);
    if (foonits == 0) {
      cerr << "!" << endl;
      cerr << "! fatal error : failed to fit logistic model" << endl;
      cerr << "!" << endl;
      return(250);
    }
    cout << "# prefitted logistic model using " << foonits << " Fisher scoring iterations" << endl;
    { int ii = 0;
      if (logipars[nCov] < 0) {
	cout << "# logit(risk) = -" << -logipars[nCov] << endl;
      } else {
	cout << "# logit(risk) =  " << logipars[nCov] << endl;
      }
      for (list<string>::iterator i = io.nCovar->begin(); i != io.nCovar->end(); ++i) {
	// die if (ii >= nCov)
	if (logipars[ii] < 0) {
	  cout << "#               -" << -logipars[ii] << " * [ " << *i << " ]" << endl;
	} else {
	  cout << "#               +" << logipars[ii] << " * [ " << *i << " ]" << endl;
	}
	++ii;
      }
    }
  }
  
  // make a hash table to store the list of SNPs to analyse
  // onlyList must have scope all of main()
  hash_set<const char*, __gnu_cxx::hash<const char*>, strEq> onlyList;

  if (onlyFilter) {
    ifstream onlyfile (onlyFilename.c_str());
    if (!onlyfile) {
      cerr << "!" << endl;
      cerr << "! fatal error : cannot open inclusion list file (--only option)" << endl;
      cerr << "!" << endl;
      return (1);
    }
    cout << "# reading inclusion list [ " << onlyFilename << " ]" << endl;
    string token;
    while (onlyfile >> token) {
      if (onlyList.find(token.c_str()) == onlyList.end()) {
	char* str = new char [token.length()+1];
	strcpy(str, token.c_str());
	onlyList.insert(str);
      }
    }
    onlyfile.close();
    cout << "# read " << onlyList.size() << " items for inclusion" << endl;
    
    // ? how to deallocate memory - a mess!  cannot just iterate through onlyList
    // because delete [] (*it) invalidates iterator....
    // maybe need to move from hash_set to list for that ...?
  }

  list<string> orgList;
  list<string> lociNameList;
  list<string> lociSnpList;
  list<string>::iterator oit;
  list<string>::iterator oit1;
  hash_set<const char*, __gnu_cxx::hash<const char*>, strEq> lociSnpListHash;
  hash_set<const char*, __gnu_cxx::hash<const char*>, strEq>::iterator it;

  if (lociFilter) {
    ifstream locifile (lociFilename.c_str());
    if (!locifile) {
      cerr << "!" << endl;
      cerr << "! fatal error : cannot open inclusion list file (--loci option)" << endl;
      cerr << "!" << endl;
      return (1);
    }
    cout << "# reading loci list [ " << lociFilename << " ]" << endl;
    string token;
  
    while (locifile >> token) {
      char* str = new char [token.length()+1];
      strcpy(str, token.c_str());
      oit = orgList.end();
      orgList.insert(oit,str);
    }
    locifile.close();

    string strTmp = "locus0";
    for (oit = orgList.begin(); oit != orgList.end(); ++oit) {
       if (strncmp((*oit).c_str(),"locus",5) == 0) {
	strTmp = (*oit).c_str();
      } else {
	oit1 = lociSnpList.end();
	lociSnpList.insert(oit1,(*oit).c_str());
	oit1 = lociNameList.end();
	lociNameList.insert(oit1,strTmp);
	//	cout << " " << (*oit).c_str() << " " << strTmp << endl;
      }
    }
    
    for (oit = lociSnpList.begin(); oit != lociSnpList.end(); ++oit) {
      //      cout << (*oit).c_str() << " " << (*it).c_str() << endl;
      lociSnpListHash.insert((*oit).c_str());
    }
    //    for (int i = 0; i < totItem) {
    //  int fnd = lociList.find("locus");
    //while (fnd != lociList.end())
      
    cout << "# read " << lociSnpList.size() << " markers for inclusion" << endl;
    
  }
    
  cout << "# opening output file [ " << io.outputFilename << " ]" << endl;
  ofstream outputfile (io.outputFilename.c_str());
  if (!outputfile) {
    cerr << "!" << endl;
    cerr << "! fatal error : cannot open output file" << endl;
    cerr << "!" << endl;
    return (1);
  }
  
  // print header for output file
  
  // general header, repeating first five columns of input file, plus expected genotype counts
  outputfile << "id1 id2 pos alleleA alleleB meanAA meanAB meanBB";
  
  // analysis-specific parts of header

  if (method.MAF) outputfile << " MAF";
  if (method.rSqHat) outputfile << " rSqHat";
  if (method.alphaHat) outputfile << " alphaHat";
  if (method.Hardy) outputfile << " pHW";

  if (method.test) { 
    if (method.normal) outputfile << " normal.magic.beta normal.magic.se normal.magic.p";
    if (method.mix) outputfile << " mixture.magic.beta mixture.magic.se mixture.magic.p";
    if (method.qqnormal) outputfile << " qqnormal.magic.beta qqnormal.magic.se qqnormal.magic.p";
  }

  if (method.call) {
    if (method.normal) outputfile << " call.ncalled normal.call.beta normal.call.se normal.call.p";
    if (method.qqnormal) {
      if (!method.normal) outputfile << " call.ncalled";
      outputfile << " qqnormal.call.beta qqnormal.call.se qqnormal.call.p";
    }
  }
  
  if (method.mean) {
    if (method.normal) {
      outputfile << " normal.mean.beta normal.mean.se normal.mean.p";
      if (permVerbose) {
	for (int p = 1; p <= numPermutations; ++p) outputfile << " normal.mean.p" << p;
      } else if (BesagClifford) {
	outputfile << " normal.mean.pPerm";
      }
    }
    if (method.mix) outputfile << " mixture.mean.nits mixture.mean.beta mixture.mean.se mixture.mean.p";      
    if (method.qqnormal) outputfile << " qqnormal.mean.beta qqnormal.mean.se qqnormal.mean.p";

    if (method.interaction) {
       if (method.robust) outputfile << " snp.beta snp.se snp.p interaction.beta interaction.se interaction.p cov.snp.interaction";
       else outputfile << " interaction.beta interaction.se interaction.p"; 
       }
  }



  if (method.score) {
    if (method.normal) outputfile << " normal.score.beta normal.score.se normal.score.p normal.score.info";
    if (method.mix) outputfile << " mixture.score.beta mixture.score.se mixture.score.p";
    if (method.qqnormal) outputfile << " qqnormal.score.beta qqnormal.score.se qqnormal.score.p";
  }

  if (method.ML) {
    if (method.normal) outputfile << " normal.ML.nits normal.ML.beta normal.ML.se normal.ML.p";
    if (method.mix) outputfile << " mixture.ML.nits mixture.ML.beta mixture.ML.se mixture.ML.p";
    if (method.qqnormal) outputfile << " qqnormal.ML.nits qqnormal.ML.beta qqnormal.ML.se qqnormal.ML.p";
  }

  if (method.MCMC) {
    if (method.normal) outputfile << " normal.MCMC.nsims normal.MCMC.beta normal.MCMC.se normal.MCMC.p";
  }

  if (method.binary) outputfile << " binary.mean.nits binary.mean.beta binary.mean.se binary.mean.p";

  if (method.poe) {
    if (method.normal) outputfile << " normal.poe.beta normal.poe.se normal.poe.p varAA varAB varBB";
    if (method.qqnormal) outputfile << " qqnormal.poe.beta qqnormal.poe.se qqnormal.poe.p qqvarAA qqvarAB qqvarBB";
  }

  if (method.multiSNP) {
    if (method.normal) outputfile << " normal.multiSNP.beta normal.multiSNP.se normal.multiSNP.p normal.multiSNP.explVar normal.multiSNP.explVarAdj normal.multiSNP.locusID";
    if (method.qqnormal) outputfile << " qqnormal.multiSNP.beta qqnormal.multiSNP.se qqnormal.multiSNP.p qqnormal.multiSNP.explVar qqnormal.multiSNP.explVarAdj qqnormal.multiSNP.locusID";
  }
 
  //  if (method.Pettitt) {
  //  outputfile << " beta.Pettitt se.Pettitt p.Pettitt";
  //}    
  
  outputfile << endl;
  
  ofstream copyfile;
  if (method.copy) {
    if (method.copycalls) {
      cout << "# copying called genotypes to [ " << method.copyFilename << " ]" << endl;
      cout << "#     using calling threshold [ " << method.callThresh << " ]" << endl;
    } else if (method.copydose) {
      cout << "# copying genotype dosages to [ " << method.copyFilename << " ]" << endl;
    } else {
      cout << "# copying genotype probabilities to [ " << method.copyFilename << " ]" << endl;
    }
    cout << "# copying for " << n << " individuals with non-missing phenotypes ONLY" << endl;
    copyfile.open(method.copyFilename.c_str());
    if (!copyfile) {
      cerr << "!" << endl;
      cerr << "! fatal error : cannot open copy output file" << endl;
      cerr << "!" << endl;
      return (1);
    }
  }

  
  // variables read from file
  string id1, id2, pos, alleleA, alleleB;
  // temp variables for current individual
  double thisp0, thisp1, thisp2;
  
  // probabilities for genotypes 0, 1 and 2
  vector<double> p0 (n);
  vector<double> p1 (n);
  vector<double> p2 (n);
  
  // **cumulative** probabilities for genotype 0 and 1
  vector<double> bigP1 (n);
  
  // called genotypes, like meanG but called, -1 means unknown
  vector<int> callG (n);

  list<bool>::iterator indiv;
  
  double sump;

  long int snpnumTotal = 0;
  long int snpnum = 0;
  long int multiSNPnum = lociSnpList.size();
  
  int nCalled; // number of called genotypes for method.call
  int nUsed; // number of simulation or MCMC iterations used
  // (i.e. not discarded due to impropriety or burnin resp.)
  int nIts; // number of iterations of EM algorithm
  //double sqrtRhat;
  
  double ebeta; // posterior mean
  double sdbeta; // posterior sd
  
  double sumGG; // ditto


  clock_t last_time, this_time;
  double total_seconds = 0.;
  last_time = clock();
  
  if (method.multiSNP) {
    if (!lociFilter) {
      cerr << "!" << endl;
      cerr << "! fatal error : --loci option is needed when --method-multiSNP is invoked" << endl;
      cerr << "!" << endl;
    } else {

    ////////////////////////////////
    //
    // For multi-SNP method the whole file needs to be read
    //
    ///////////////////////////////

  vector<double> Q (n*multiSNPnum);
  vector<string> snpid1 (multiSNPnum);
  vector<string> snpid2 (multiSNPnum);
  vector<string> snppos (multiSNPnum);
  vector<int> snpLocusNum (multiSNPnum);
  vector<string> aA (multiSNPnum);
  vector<string> aB (multiSNPnum);
  vector<string> locusId (multiSNPnum);
  vector<string> locusSnp (multiSNPnum);
  vector<int> locusIdNum (multiSNPnum);
  vector<double> mAA (multiSNPnum);
  vector<double> mAB (multiSNPnum);
  vector<double> mBB (multiSNPnum);
  int sn;

  int i = 0;
  for (oit = lociNameList.begin(); oit != lociNameList.end(); ++oit) {
    locusId[i] = (*oit).c_str();
    //cout << locusId[i] << endl;
    ++i;
  }

  i = 0;
  for (oit = lociSnpList.begin(); oit != lociSnpList.end(); ++oit) {
    locusSnp[i] = (*oit).c_str();
    //cout << locusSnp[i] << endl;
    ++i;
  }
  
  locusIdNum[0] = 1;
  for (int i = 1; i < multiSNPnum ; ++i) {
    if (!locusId[i-1].compare(locusId[i])) {
      locusIdNum[i] = locusIdNum[i-1];
    } else {
      locusIdNum[i] = locusIdNum[i-1]+1;
    }
   }
  int locusNum = locusIdNum[multiSNPnum-1];

  while (!io.genotypeFilelist->empty()) {
    io.genotypeFilename = io.genotypeFilelist->front();
    io.genotypeFilelist->pop_front();
    
    cout << "# processing genotype file [ " << io.genotypeFilename << " ]" << endl;
    snpnum = 0;
    // ifstream genotypefile (io.genotypeFilename.c_str());

if(bgen == false) {
    igzstream genotypefile (io.genotypeFilename.c_str());	//TODO This is where to change from. I think I'll just generate a new class that returns similar things (should be a child class of gzstream so I can keep both functionality

    if (!genotypefile) {
      cerr << "!" << endl;
      cerr << "! fatal error : cannot open genotype file" << endl;
      cerr << "!" << endl;
      return (1);
    }
    while (genotypefile >> id1 >> id2 >> pos >> alleleA >> alleleB) {
      ++snpnumTotal;
      ++snpnum;
      if (snpnumTotal % 20 == 0) {
	this_time = clock();
	double elapse_seconds = (double)(this_time - last_time)/CLOCKS_PER_SEC;
	total_seconds += elapse_seconds;
	last_time = this_time;
	if (io.progress) {
	  cout << "# processing line [ " << id1 << " " << id2 << " " << pos << " ] number " << snpnumTotal;
	  cout << " at " << fprec(elapse_seconds*125./9.,2) << " hours/million SNPs" << "       " << "\r";
	  fflush(stdout); 	
	}
      }

      // idea here is to read the geno file as quickly as possible,
      // deferring analysis specific calculations to subsequent blocks of code
   
      {
	int i = 0; // local to this block
	for (indiv = phenotypeokay.begin(); indiv != phenotypeokay.end(); ++indiv) {
	  if (genotypefile >> thisp0 >> thisp1 >> thisp2) {
	    if (*indiv) {
	      p0[i] = thisp0;
	      p1[i] = thisp1;
	      p2[i] = thisp2;
	      ++i;
	    }
	    // else (implies !*indiv) so do nothing
	  } else { // could not read three doubles from file
	    cerr << "!" << endl;
	    cerr << "! fatal error: unexpected failure reading file [ " << io.genotypeFilename << " ]" << endl;
	    cerr << "!              part way through line " << snpnumTotal << " snp " << id2 << endl;
	    cerr << "! TEXT" << endl;
	    EXIT_WITH_ERROR(1, "");
	  }
	}
      }
      
      if (lociFilter && lociSnpListHash.find(id2.c_str()) == lociSnpListHash.end()) {
	snpnum -= 1;
	continue; // only read in SNPs appear at least one locus
      } else {
	snpid1[snpnum-1] = id1;
	snpid2[snpnum-1] = id2;
	snppos[snpnum-1] = pos;
	aA[snpnum-1] = alleleA;
	aB[snpnum-1] = alleleB;
	double tmpAA = 0;
	double tmpAB = 0;
	double tmpBB = 0;
	for (int i = 0; i < n; ++i) {
	  Q[n*(snpnum-1)+i] = p1[i]+2*p2[i];
	  tmpAA += p0[i];
	  tmpAB += p1[i];
	  tmpBB += p2[i];
	}
	mAA[snpnum-1] = tmpAA;
	mAB[snpnum-1] = tmpAB;
	mBB[snpnum-1] = tmpBB;
      }
    }
    genotypefile.close();
    if (!genotypefile.eof()) {
      cerr << "!" << endl;
      cerr << "! fatal error: unexpected termination of [ " << io.genotypeFilename << " ]" << endl;
      cerr << "!" << endl;
      EXIT_WITH_ERROR(1, "");
    }
} else { // --bgen
    bgenstream genotypefile (io.genotypeFilename.c_str());	//TODO This is where to change from. I think I'll just generate a new class that returns similar things (should be a child class of gzstream so I can keep both functionality

    if (!genotypefile.input) {
      cerr << "!" << endl;
      cerr << "! fatal error : cannot open genotype file" << endl;
      cerr << "!" << endl;
      return (1);
    }
    while (1) {
      genotypefile.read_variant_id();
      if(!genotypefile.input) { break; }
      id1=genotypefile.get_vid();
      id2=genotypefile.get_rsid();
      pos=genotypefile.get_pos_string();
      alleleA=genotypefile.get_aA();
      alleleB=genotypefile.get_aB();
      if(!genotypefile.input) { break; } // is this check redundant?

      ++snpnumTotal;
      ++snpnum;
      if (snpnumTotal % 20 == 0) {
	this_time = clock();
	double elapse_seconds = (double)(this_time - last_time)/CLOCKS_PER_SEC;
	total_seconds += elapse_seconds;
	last_time = this_time;
	if (io.progress) {
	  cout << "# processing line [ " << id1 << " " << id2 << " " << pos << " ] number " << snpnumTotal;
	  cout << " at " << fprec(elapse_seconds*125./9.,2) << " hours/million SNPs" << "       " << "\r";
	  fflush(stdout);
	}
      }

      // idea here is to read the geno file as quickly as possible,
      // deferring analysis specific calculations to subsequent blocks of code

      {
	if(!genotypefile.input) { EXIT_WITH_ERROR(10, "input not available"); }
	genotypefile.read_variant_probabilities();
	int i = 0; // local to this block
	int iindiv = 0; // counter of individual
	for (indiv = phenotypeokay.begin(); indiv != phenotypeokay.end(); ++indiv) {
	  if (genotypefile.probs.size() >= (unsigned int)(3*i+2)) {
	    if (*indiv) {
	      p0[i] = genotypefile.probs[3*iindiv];
	      p1[i] = genotypefile.probs[3*iindiv+1];
	      p2[i] = genotypefile.probs[3*iindiv+2];
	      ++i;
	    }
		++iindiv;
	    // else (implies !*indiv) so do nothing
	  } else { // could not read three doubles from file
	    cerr << "!" << endl;
	    cerr << "! fatal error: unexpected failure reading file [ " << io.genotypeFilename << " ]" << endl;
	    cerr << "!              part way through line " << snpnum << " snp " << id2 << endl;
	    EXIT_WITH_ERROR(1, "");
	  }
	}
      }

      if (lociFilter && lociSnpListHash.find(id2.c_str()) == lociSnpListHash.end()) {
	snpnum -= 1;
	continue; // only read in SNPs appear at least one locus
      } else {
	snpid1[snpnum-1] = id1;
	snpid2[snpnum-1] = id2;
	snppos[snpnum-1] = pos;
	aA[snpnum-1] = alleleA;
	aB[snpnum-1] = alleleB;
	double tmpAA = 0;
	double tmpAB = 0;
	double tmpBB = 0;
	for (int i = 0; i < n; ++i) {
	  Q[n*(snpnum-1)+i] = p1[i]+2*p2[i];
	  tmpAA += p0[i];
	  tmpAB += p1[i];
	  tmpBB += p2[i];
	}
	mAA[snpnum-1] = tmpAA;
	mAB[snpnum-1] = tmpAB;
	mBB[snpnum-1] = tmpBB;
      }
    }
    genotypefile.close();
    if (!genotypefile.input.eof()) {
      cerr << "!" << endl;
      cerr << "! fatal error: unexpected termination of [ " << io.genotypeFilename << " ]" << endl;
      cerr << "!" << endl;
      EXIT_WITH_ERROR(1, "");
    }
}

    cout << "# finished reading genotype file [ " << io.genotypeFilename << " ] with " << snpnumTotal << " SNPs" << "        " << endl;
    if (snpnumTotal == 0) {
      cout << "! warning : [ " << io.genotypeFilename << " ] was empty, or does not exist" << endl;
    } else {
      //      snpnumTotal += snpnum;
    }
  }  // end while
    // all genotypes are read in and stored in Q

  string locusName;
  int colIx;
  set<string> dupCatcher;
  set<string>::iterator setit;
  int nLoc;
  int nLocNew;

  for (int locusCounter = 1; locusCounter <= locusNum; ++locusCounter) {
    colIx = 0;
    vector<double> Qred (n*snpnum);
    vector<string> snpid1red (snpnum);
    vector<string> snpid2red (snpnum);
    vector<string> snpposred (snpnum);
    vector<string> aAred (snpnum);
    vector<string> aBred (snpnum);
    vector<double> mAAred (snpnum);
    vector<double> mABred (snpnum);
    vector<double> mBBred (snpnum);

    nLoc = 0;
    nLocNew = 0;
    dupCatcher.clear();

    for (int k = 0; k < multiSNPnum; ++k) {

      if (locusIdNum[k]==locusCounter) {
	for (int j = 0; j < snpnum; ++j) {
	  if (locusSnp[k].compare(snpid2[j]) == 0) {
	    dupCatcher.insert(snpid2[j]);
	    nLocNew = dupCatcher.size();

	    if (nLocNew > nLoc) {
	      locusName = locusId[k];
	      snpid1red[colIx] = snpid1[j];
	      snpid2red[colIx] = snpid2[j];
	      snpposred[colIx] = snppos[j];
	      aAred[colIx] = aA[j];
	      aBred[colIx] = aB[j];
	      mAAred[colIx] = mAA[j];
	      mABred[colIx] = mAB[j];
	      mBBred[colIx] = mBB[j];

	      for (int jj = 0; jj < n; ++jj) {
		Qred[n*colIx+jj] = Q[n*j+jj];
	      }
	      ++colIx;
	      ++nLoc;
	    } else {
	      cout << "! warning : Multiple instance of SNP " << snpid2[j] << " found at " << locusName  << endl;
	      cout << "!         : only one copy kept" << endl;
	    }
	  }
	}
      }
    }
  

    sn = colIx;
    Qred.resize(n*sn);
    snpid1red.resize(sn);
    snpid2red.resize(sn);
    snpposred.resize(sn);
    aAred.resize(sn);
    aBred.resize(sn);
    mAAred.resize(sn);
    mABred.resize(sn);
    mBBred.resize(sn);

    vector<double> beta (sn);
    vector<double> se (sn);
    vector<double> pval (sn);
    double explVar = 0;
    double explVarAdj = 0;
    if (method.normal) {
      if (method_multiSNP(*y[0], Qred, n, sn, priorDF, beta, se, pval, explVar,explVarAdj)) {
	for (int i = 0; i < sn; ++i) {
	  if (!std::isnan(beta[i]) && !std::isnan(se[i]) && std::isnan(pval[i])) {
	    double df = n+priorDF-sn;
	    double pv = 2.*pt(-fabs(beta[i])/(se[i]),df , 1, 0);
	    cout << beta[i] << " " << se[i] << " " << pval[i] << " " << pv << " " << df << endl;
	  }
	  if (beta[i]==0 || se[i]==0 || pval[i]==0 || std::isnan(pval[i])) {
	    outputfile <<  snpid1red[i] << " " << snpid2red[i] << " " << snpposred[i] << " " << aAred[i] << " " << aBred[i] << " " << mAAred[i]  << " " << mABred[i]  << " " << mBBred[i] << " NA NA NA " << explVar << " " << explVarAdj << " " << locusName << endl;
	  } else {
	    outputfile << snpid1red[i] << " " << snpid2red[i] << " " << snpposred[i] << " " << aAred[i] << " " << aBred[i] << " " << mAAred[i]  << " " << mABred[i]  << " " << mBBred[i] << " " << beta[i] << " " << se[i] << " " << pval[i] << " " << explVar << " " << explVarAdj << " " << locusName << endl;
	  }
	}
      } else {
	for (int i = 0; i < sn; ++i) {
	  outputfile <<  snpid1red[i] << " " << snpid2red[i] << " " << snpposred[i] << " " << aAred[i] << " " << aBred[i] << " " << mAAred[i]  << " " << mABred[i]  << " " << mBBred[i] << " NA NA NA NA " << locusName << endl;
	}
      }
    }

    if (method.qqnormal) {
      if (method_multiSNP(yNorm, Qred, n, sn, priorDF, beta, se, pval, explVar,explVarAdj)) {
	for (int i = 0; i < sn; ++i) {
	  if (!std::isnan(beta[i]) && !std::isnan(se[i]) && std::isnan(pval[i])) {
	    double df = n+priorDF-sn;
	    double pv = 2.*pt(-fabs(beta[i])/(se[i]),df , 1, 0);
	    cout << beta[i] << " " << se[i] << " " << pval[i] << " " << pv << " " << df << endl;
	  }
	  if (beta[i]==0 || se[i]==0 || pval[i]==0 || std::isnan(pval[i])) {
	    outputfile <<  snpid1red[i] << " " << snpid2red[i] << " " << snpposred[i] << " " << aAred[i] << " " << aBred[i] << " " << mAAred[i]  << " " << mABred[i]  << " " << mBBred[i] << " NA NA NA " << explVar << " " << explVarAdj << " " << locusName << endl;
	  } else {
	    outputfile << snpid1red[i] << " " << snpid2red[i] << " " << snpposred[i] << " " << aAred[i] << " " << aBred[i] << " " << mAAred[i]  << " " << mABred[i]  << " " << mBBred[i] << " " << beta[i] << " " << se[i] << " " << pval[i] << " " << explVar << " " << explVarAdj << " " << locusName << endl;
	  }
	}
      } else {
	for (int i = 0; i < sn; ++i) {
	  outputfile <<  snpid1red[i] << " " << snpid2red[i] << " " << snpposred[i] << " " << aAred[i] << " " << aBred[i] << " " << mAAred[i]  << " " << mABred[i]  << " " << mBBred[i] << " NA NA NA NA " << locusName << endl;
	}
      }
    }
  }

  outputfile.close();
  this_time = clock();
  total_seconds += (double)(this_time - last_time)/CLOCKS_PER_SEC;
  
  cout << "# finished processing all genotype files with total " << snpnumTotal << " SNPs" << endl;
  cout << "# average speed " << fprec(total_seconds*2500./9./(double)snpnumTotal,3) << " hours/million SNPs";
  cout << " or " << fprec((double)snpnumTotal/total_seconds,2) << " SNPs/second" << endl;

  cout << "#" << endl;
    }
  } else { 
    //////////////////////////
    //
    // single SNP associations can be read in and processed line-by-line
    //
    ////////////////////////
  while (!io.genotypeFilelist->empty()) {
    io.genotypeFilename = io.genotypeFilelist->front();
    io.genotypeFilelist->pop_front();
    
    cout << "# processing genotype file [ " << io.genotypeFilename << " ]" << endl;
    snpnum = 0;

    // ifstream genotypefile (io.genotypeFilename.c_str());
if(bgen){
    bgenstream genotypefile (io.genotypeFilename.c_str());

    while (1) {
		if(!genotypefile.input) { EXIT_WITH_ERROR(10, "input not available"); }

		genotypefile.read_variant_id();
		if(!genotypefile.input) { break; }
		id1=genotypefile.get_vid();
		id2=genotypefile.get_rsid();
		pos=genotypefile.get_pos_string();
		alleleA=genotypefile.get_aA();
		alleleB=genotypefile.get_aB();
		if(!genotypefile.input) { break; } // is this check redundant?

      ++snpnum;
      if (snpnum % 20 == 0) {
	this_time = clock();
	double elapse_seconds = (double)(this_time - last_time)/CLOCKS_PER_SEC;
	total_seconds += elapse_seconds;
	last_time = this_time;
	if (io.progress) {
	  cout << "# processing snp [ " << id1 << " " << id2 << " " << pos << " ] number " << snpnum;
	  cout << " at " << fprec(elapse_seconds*125./9.,2) << " hours/million SNPs" << "       " << "\r";
	  fflush(stdout); 	
	}
      }

      double eSumgg;

      // idea here is to read the geno file as quickly as possible,
      // deferring analysis specific calculations to subsequent blocks of code
   
      {
	if(!genotypefile.input) { EXIT_WITH_ERROR(10, "input not available"); }
	genotypefile.read_variant_probabilities();
	int i = 0; // local to this block
	int iindiv = 0; // counter of individual
	for (indiv = phenotypeokay.begin(); indiv != phenotypeokay.end(); ++indiv) {
	  if (genotypefile.probs.size() >= (unsigned int)(3*i+2)) {
	    if (*indiv) {
	      p0[i] = genotypefile.probs[3*iindiv];
	      p1[i] = genotypefile.probs[3*iindiv+1];
	      p2[i] = genotypefile.probs[3*iindiv+2];
	      ++i;
	    }
		++iindiv;
	    // else (implies !*indiv) so do nothing
	  } else { // could not read three doubles from file
	    cerr << "!" << endl;
	    cerr << "! fatal error: unexpected failure reading file [ " << io.genotypeFilename << " ]" << endl;
	    cerr << "!              part way through line " << snpnum << " snp " << id2 << endl;
	    EXIT_WITH_ERROR(1, "");
	  }
	}
      }

      if (onlyFilter && onlyList.find(id2.c_str()) == onlyList.end()) continue;

      // if needed, genotype calls are precomputed *before* renorming probabilities and kludging
      if (method.copycalls || method.Hardy || method.call) {
	for (int i = 0; i < n; ++i) {
	  if (p0[i] >= method.callThresh && p0[i] > p1[i] && p0[i] > p2[i]) {
	    callG[i] = 0;
	  } else if (p1[i] >= method.callThresh && p1[i] > p0[i] && p1[i] > p2[i]) {
	    callG[i] = 1;
	  } else if (p2[i] >= method.callThresh && p2[i] > p0[i] && p2[i] > p1[i]) {
	    callG[i] = 2;
	  } else {
	    callG[i] = -1;
	  }
	}
      }

      // if needed dosages are copied
      if (method.copydose) {
	for (int i= 0; i < n; ++i) {
	  meanG[i] = p1[i]+2*p2[i];
	}
      }

      // if copying probabilities, 
      // we copy the genotype probabilities before checking positiveness and renormalising

      if (method.copy) {
        if (method.copycalls) { // implies method.copycalls
	  copyfile << id1 << " " << id2 << " 0 " << pos;
	  for (int i = 0; i < n; ++i) {
	    switch (callG[i]) {
	    case 0:
	      copyfile << " " << alleleA << " " << alleleA;
	      break;
	    case 1:
	      copyfile << " " << alleleA << " " << alleleB;
	      break;
	    case 2:
	      copyfile << " " << alleleB << " " << alleleB;
	      break;
	    default:
	      copyfile << " 0 0"; 
	      break;
	    }
	  }
	  copyfile << endl;

	} else if (method.copydose) {
	  copyfile << id1 << " " << id2 << " " << pos << " " << alleleA << " " << alleleB;
	  for (int i = 0; i < n; ++i) {
	    copyfile << " " << meanG[i];
	  }
	  copyfile << endl;

	} else {
	  copyfile << id1 << " " << id2 << " " << pos << " " << alleleA << " " << alleleB;
	  for (int i = 0; i < n; ++i) {
	    copyfile << " " << p0[i] << " " << p1[i] << " " << p2[i];
	  }
	  copyfile << endl;

	}
      }

      
      // now check positiveness, renorm and compute expected genotype frequencies

      // to store expected numbers of genotypes 1 and 2
      double meanAB = 0.;
      double meanBB = 0.;

      int numZeroKludge = 0;
      for (int i = 0; i < n; ++i) {
	sump = p0[i]+p1[i]+p2[i];
	if (sump <= 0) {
	  ++numZeroKludge;
	  // cerr << "!" << endl;
	  // cerr << "! fatal error: nonpositive genotype probabilities" << endl;
	  // cerr << "!              line " << snpnum << " snp " << id2 << endl;
	  // cerr << "!" << endl;
	  // return (1);
	} else {
	  p0[i] /= sump;
	  p1[i] /= sump;
	  p2[i] /= sump;
	  bigP1[i] = thisp0+thisp1;

	  meanAB += p1[i];
	  meanBB += p2[i];
	}
      }

      if (numZeroKludge > 0) {
	double kludgeAA;
	double kludgeAB;
	double kludgeBB;
	
	if (numZeroKludge == n) {
	  kludgeAA = 0.25;
	  kludgeAB = 0.5;
	  kludgeBB = 0.25;
	} else {
	  kludgeAA = ((double)(n - numZeroKludge) - meanAB - meanBB)/(double)(n - numZeroKludge);
	  kludgeAB = (double)(meanAB)/(double)(n - numZeroKludge);
	  kludgeBB = (double)(meanBB)/(double)(n - numZeroKludge);
	}

	for (int i = 0; i < n; ++i) {
	  sump = p0[i]+p1[i]+p2[i];
	  if (sump <= 0) {
	    p0[i] = kludgeAA;
	    p1[i] = kludgeAB;
	    p2[i] = kludgeBB;
	    bigP1[i] = thisp0+thisp1;
	    meanAB += p1[i];
	    meanBB += p2[i];
	    --numZeroKludge;
	  } // else do nothing
	}

	if (numZeroKludge != 0) {
	  cerr << "!" << endl;
	  cerr << "! fatal internal error : could not kludge individual(s) with p0+p1+p2=0" << endl;
	  cerr << "!                        line " << snpnum << " snp " << id2 << endl;
	  cerr << "!" << endl;
	  return(56);
	}
      }

      // HOW TO HANDLE p0+p1+p2==0 PROPERLY ???
      // to deal properly with p0+p1+p2==0, need to 

      // reset n

      // pack down contents of p0, p1, p2, bigP1

      // pack down contents of *y[0], yNorm
      // recompute sumyy, sumyyNorm

      // pack down contents of env,
      // recompute sumenv2, sumyenv

      // we will not refit the mixture model

      // proper handling of this case conflicts with the 
      // --perm-verbose idea


      outputfile << id1 << " " << id2 << " " << pos << " " << alleleA << " " << alleleB;
      outputfile << " " << ((double)n-meanAB-meanBB) << " " << meanAB << " " << meanBB;

      if (method.MAF) {
	double freqB = (0.5*meanAB + meanBB)/(double)n;
	outputfile << " " << (freqB < 0.5 ? freqB : 1.-freqB);
      }

      if (method.rSqHat) {
	double sumX2 = 0.;
	double sumXbar2 = 0.;
	for (int i = 0; i < n; ++i) {
	  sumX2 += p1[i]+4.*p2[i];
	  sumXbar2 += pow2(p1[i]+2.*p2[i]);
	}
	double meanX = (meanAB+2.*meanBB)/(double)n;
	double rSqHat = (sumXbar2/double(n)-pow2(meanX))/(sumX2/double(n)-pow2(meanX));
	// this is fraction of variance explained by the Xbar
	outputfile << " " << rSqHat;
      }

      if (method.alphaHat) {
	outputfile << " " << compute_alphaHat(p0, p1, p2, meanAB, meanBB, n);
      }

      if (method.Hardy) {
	int callAA = 0;
	int callAB = 0;
	int callBB = 0;
	for (int i = 0; i < n; ++i) {
	  switch (callG[i]) {
	  case 0:
	    ++callAA;
	    break;
	  case 1:
	    ++callAB;
	    break;
	  case 2:
	    ++callBB;
	    break;
	  default:
	    break;
	  }
	}
	if (callAA > 0 || callAB > 0 || callBB > 0) {
	  outputfile << " " << SNPHWE(callAB, callAA, callBB);
	} else {
	  outputfile << " NA";
	}
      }

      eSumgg = meanAB + 4.*meanBB - pow2(meanAB + 2.*meanBB) / n + ppbeta;

      if (method.test) {
	random_shuffle(iPerm.begin(), iPerm.end(), randInt);
	synthetic (y[0], *y[1], &meanG, p0, bigP1, n, iPerm, method.testBeta, &sumyy, &sumGG);
	// this replaces the vector pointed to by y[0], with a vector
	// constructed from *y[1] (which is a random permutation of
	// *y[0]) by adding effects of simulated genotypes, with allelic
	// effect method.testBeta.  The new vector is centered and sumyy
	// is updated appropriately
	
	for (int k = 0; k < method.mixBigK; ++k) {
	  pi[k] = piRef[k];
	  mu[k] = muRef[k];
	  sigma[k] = sigmaRef[k];
	}
	if (method.mix) {
	  if (refit_mixture (*y[0], n, sumyy, method.mixBigK, method.mixCentered, &pi, &mu, &sigma, false) &&
	      method_mix_null (*y[0], n, method.mixBigK, pi, mu, sigma, &foonits, &alphaMLE, &foodiff, false)) {
	    // okay
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error : could not (re)fit mixture model to simulated phenotype" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}
	if (!method.mix) {
	  if (method_mean(*y[0], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA";
	  }
	} else { // implies method.mix
	  if (method_mean_mix(*y[0], meanG, n, method.mixBigK, pi, mu, sigma, alphaMLE,
			      &nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se;
	    outputfile << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
      }
    
      if (method.call) {
	if (method.normal) {
	  if (method_call(*y[0], callG, n, priorDF, ppbeta, method.callThresh, &nCalled, &beta, &se, &pval)) {
	    outputfile << " " << nCalled << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
	if (method.qqnormal) {
	  if (method_call(yNorm, callG, n, priorDF, ppbeta, method.callThresh, &nCalled, &beta, &se, &pval)) {
	    if (!method.normal) {
	      outputfile << " " << nCalled;
	    }
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    if (!method.normal) {
	      outputfile << " NA";
	    }
	    outputfile << " NA NA NA";
	  }
	}
      }
      
      if (method.mean || method.interaction || method.binary || method.Pettitt) {
	// for possibility of using permutations, we like to be able to precompute the expected genotypes...
	precompute_mean(p1, p2, n, meanAB, meanBB, &meanG, &sumGG);
      }
      
      if (method.mean) {
	if (method.normal) {
	  if (method_mean(*y[0], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	    
	    if (permVerbose) {
	      for (int p = 1; p <= numPermutations; ++p) {
		if (method_mean(*y[p], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
		  outputfile << " " << pval;
		} else {
		  outputfile << " NA";
		}
	      }
	    } else if (BesagClifford) {
	      double ptrue = pval;
	      double BCp;
	      int g = 0; // number of successes
	      int l = 0; // number of trials
	      
	      do {
		if (l < numPermutations) {
		  if (method_mean(*y[l+1], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
		    if (pval <= ptrue) ++g;
		  } // else something bad happened, assume ! pval <= ptrue
		} else {
		  // we're using the last permutation, so shuffle it
		  random_shuffle(iPerm.begin(), iPerm.end(), randInt);
		  for (int i = 0; i < n; ++i) {
		    (*y[numPermutations])[i] = (*y[0])[iPerm[i]];
		    // no need to: (*y2[numPermutations])[i] = (*y2[0])[iPerm[i]];
		  }
		  if (method_mean(*y[numPermutations], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
		    if (pval <= ptrue) ++g;
		  }  // else something bad happened, assume ! pval <= ptrue
		}
		++l;
		if (g >= threshBesagClifford) {
		  BCp = (double)g/(double)l;
		  break;
		}
		if (l >= maxPermutations) {
		  BCp = (double)(g+1)/(double)maxPermutations;
		  break;
		}
	      } while (true);
	      outputfile << " " << BCp;
	    }
	    
	  } else { // implies first call to method_mean failed...
	    outputfile << " NA NA NA";
	    if (permVerbose) {
	      for (int p = 1; p <= numPermutations; ++p) outputfile << " NA";
	    } else if (BesagClifford) {
	      outputfile << " NA";
	    }
	  }
	}
	
	// within if(method.mean), mean genotypes are precomputed
	if (method.mix) {
	  if (method_mean_mix(*y[0], meanG, n, method.mixBigK, pi, mu, sigma, alphaMLE,
			      &nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
	 
	if (method.qqnormal) {
	  if (method_mean(yNorm, meanG, n, sumGG, sumyyNorm, priorDF, ppbeta, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA";
	  }
	}
	  
	if (method.interaction) { /* inside bgen here, by the way */
	  if (method.robust) {
	    if (method_robust (yOrig, meanG, vars, vars2, varty, n, nCov, sumyyOrig, &beta, &se, &pval, &betasnp, &sesnp, &pvalsnp, &covsnpinteraction)) {
	      outputfile << " " << betasnp << " " << sesnp << " " << pvalsnp << " " << beta << " " << se << " " << pval << " " << covsnpinteraction;
	    } else {
	      outputfile << " NA NA NA NA NA NA NA";
	    }                         
          } else {
	    if (method_interaction (yOrig, meanG, vars, vars2, varty, n, nCov, sumyyOrig, &beta, &se, &pval)) {
	      outputfile << " " << beta << " " << se << " " << pval;
	    } else {
	      outputfile << " NA NA NA";
	    }
          }
	}
	
	
      } // end if (method.mean)
      
      if (method.score) {
	if (method.normal) {
	  if (method_score(ppbeta, priorDF, p1, p2, *y[0], (double)n, sumyy, &beta, &se, &pval)) {
	    double info = (sumyy/eSumgg-pow2(beta)) / ((double)n-priorDF) / pow2(se); 
	    outputfile << " " << beta << " " << se << " " << pval << " " << info;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
	if (method.mix) {
	  if (method_score_mix(p1, p2, n, pcmL1, pcmL2, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA";
	  }
	}
	if (method.qqnormal) {
	  if (method_score(ppbeta, priorDF, p1, p2, yNorm, (double)n, sumyyNorm, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA";
	  }
	}
      }

      if (method.ML) {
	if (method.normal) {
	  if (method_em(*y[0], p0, p1, p2, n, sumyy, &nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
	
	if (method.mix) {
	  if (method_newton_mix(*y[0], p0, p1, p2, n, method.mixBigK, pi, mu, sigma, alphaMLE,
				&nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}

	if (method.qqnormal) {
	  if (method_em(yNorm, p0, p1, p2, n, sumyyNorm, &nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
      } // end if (method.ML)
      
      if (method.MCMC) {
	if (method_mcmc(*y[0], p0, p1, p2, n, sumyy, priorDF, ppbeta, nsims, int(0.25*nsims),
			&nUsed, &ebeta, &sdbeta, &pval)) {
	  outputfile << " " << nUsed << " " << ebeta << " " << sdbeta << " " << pval;
	} else {
	  outputfile << " NA NA NA NA";
	}
      }
      
      if (method.binary) {
	if ((nIts = method_logistic(yBin, meanG, n, vars, nCov, &logipars, false, &beta, &se, &pval))) {
	  outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	} else {
	  outputfile << " NA NA NA NA";
	}
      }

      if (method.poe) {
	if (method.normal) {
	  if (method_poe(p0, p1, p2,  *y[0], (double)n, &beta, &se, &pval, &varAA, &varAB, &varBB)) {	  
	    outputfile << " " << beta << " " << se << " " << pval << " " << varAA << " " << varAB << " " << varBB;
	  } else {
	    outputfile << " NA NA NA NA NA NA";
	  }
	}
	if (method.qqnormal) {
	  if (method_poe(p0, p1, p2, yNorm, (double)n, &beta, &se, &pval, &varAA, &varAB, &varBB)) {	  
	    outputfile << " " << beta << " " << se << " " << pval << " " << varAA << " " << varAB << " " << varBB;
	  } else {
	    outputfile << " NA NA NA NA NA NA";
	  }
	}
      }


      //if (method.Pettitt) {
      //	if (method_Pettitt(yRank, meanG, n, normordBigA, normordSmallA, &beta, &se, &pval)) {
      //  outputfile << " " << beta << " " << se << " " << pval;
      //	} else {
      //	  outputfile << " NA NA NA";
      //	}
      //}
      outputfile << endl;
    
      } // end while success at reading genotype file
    
    genotypefile.close();
    if (!genotypefile.input.eof()) {
      cerr << "!" << endl;
      cerr << "! fatal error: unexpected termination of [ " << io.genotypeFilename << " ]" << endl;
      cerr << "!" << endl;
      EXIT_WITH_ERROR(1, "");
    }
    cout << "# finished processing genotype file [ " << io.genotypeFilename << " ] with " << snpnum << " SNPs" << "        " << endl;
    if (snpnum == 0) {
      cout << "! warning : [ " << io.genotypeFilename << " ] was empty, or does not exist" << endl;
    } else {
      snpnumTotal += snpnum;
    }

}else{  // NOT --bgen
    igzstream genotypefile (io.genotypeFilename.c_str());
    if (!genotypefile) {
      cerr << "!" << endl;
      cerr << "! fatal error : cannot open genotype file" << endl;
      cerr << "!" << endl;
      return (1);
    }

    while (1) {
		if(!option__ukbb) { // normal behaviour
			genotypefile >> id1 >> id2 >> pos >> alleleA >> alleleB;
		}
		else {
			// --ukbb was specified. Ignore the second field
			string ignore_me_for_the__ukbb_option;
			(void) ignore_me_for_the__ukbb_option;
			genotypefile >> id1 >> ignore_me_for_the__ukbb_option >> id2 >> pos >> alleleA >> alleleB;
		}
		if(!genotypefile) {
			break;
		}

      ++snpnum;
      if (snpnum % 20 == 0) {
	this_time = clock();
	double elapse_seconds = (double)(this_time - last_time)/CLOCKS_PER_SEC;
	total_seconds += elapse_seconds;
	last_time = this_time;
	if (io.progress) {
	  cout << "# processing snp [ " << id1 << " " << id2 << " " << pos << " ] number " << snpnum;
	  cout << " at " << fprec(elapse_seconds*125./9.,2) << " hours/million SNPs" << "       " << "\r";
	  fflush(stdout); 	
	}
      }

      double eSumgg;

      // idea here is to read the geno file as quickly as possible,
      // deferring analysis specific calculations to subsequent blocks of code
   
      {
	int i = 0; // local to this block
	for (indiv = phenotypeokay.begin(); indiv != phenotypeokay.end(); ++indiv) {
	  if (genotypefile >> thisp0 >> thisp1 >> thisp2) {
	    if (*indiv) {
	      p0[i] = thisp0;
	      p1[i] = thisp1;
	      p2[i] = thisp2;
	      ++i;
	    }
	    // else (implies !*indiv) so do nothing
	  } else { // could not read three doubles from file
	    cerr << "!" << endl;
	    cerr << "! fatal error: unexpected failure reading file [ " << io.genotypeFilename << " ]" << endl;
	    cerr << "!              part way through line " << snpnum << " snp " << id2 << endl;
	    cerr << "! UKBB" << endl;
	    EXIT_WITH_ERROR(1, "");
	  }
	}
      }

      if (onlyFilter && onlyList.find(id2.c_str()) == onlyList.end()) continue;

      // if needed, genotype calls are precomputed *before* renorming probabilities and kludging
      if (method.copycalls || method.Hardy || method.call) {
	for (int i = 0; i < n; ++i) {
	  if (p0[i] >= method.callThresh && p0[i] > p1[i] && p0[i] > p2[i]) {
	    callG[i] = 0;
	  } else if (p1[i] >= method.callThresh && p1[i] > p0[i] && p1[i] > p2[i]) {
	    callG[i] = 1;
	  } else if (p2[i] >= method.callThresh && p2[i] > p0[i] && p2[i] > p1[i]) {
	    callG[i] = 2;
	  } else {
	    callG[i] = -1;
	  }
	}
      }

      // if needed dosages are copied
      if (method.copydose) {
	for (int i= 0; i < n; ++i) {
	  meanG[i] = p1[i]+2*p2[i];
	}
      }

      // if copying probabilities, 
      // we copy the genotype probabilities before checking positiveness and renormalising

      if (method.copy) {
        if (method.copycalls) { // implies method.copycalls
	  copyfile << id1 << " " << id2 << " 0 " << pos;
	  for (int i = 0; i < n; ++i) {
	    switch (callG[i]) {
	    case 0:
	      copyfile << " " << alleleA << " " << alleleA;
	      break;
	    case 1:
	      copyfile << " " << alleleA << " " << alleleB;
	      break;
	    case 2:
	      copyfile << " " << alleleB << " " << alleleB;
	      break;
	    default:
	      copyfile << " 0 0"; 
	      break;
	    }
	  }
	  copyfile << endl;

	} else if (method.copydose) {
	  copyfile << id1 << " " << id2 << " " << pos << " " << alleleA << " " << alleleB;
	  for (int i = 0; i < n; ++i) {
	    copyfile << " " << meanG[i];
	  }
	  copyfile << endl;

	} else {
	  copyfile << id1 << " " << id2 << " " << pos << " " << alleleA << " " << alleleB;
	  for (int i = 0; i < n; ++i) {
	    copyfile << " " << p0[i] << " " << p1[i] << " " << p2[i];
	  }
	  copyfile << endl;

	}
      }

      
      // now check positiveness, renorm and compute expected genotype frequencies

      // to store expected numbers of genotypes 1 and 2
      double meanAB = 0.;
      double meanBB = 0.;

      int numZeroKludge = 0;
      for (int i = 0; i < n; ++i) {
	sump = p0[i]+p1[i]+p2[i];
	if (sump <= 0) {
	  ++numZeroKludge;
	  // cerr << "!" << endl;
	  // cerr << "! fatal error: nonpositive genotype probabilities" << endl;
	  // cerr << "!              line " << snpnum << " snp " << id2 << endl;
	  // cerr << "!" << endl;
	  // return (1);
	} else {
	  p0[i] /= sump;
	  p1[i] /= sump;
	  p2[i] /= sump;
	  bigP1[i] = thisp0+thisp1;

	  meanAB += p1[i];
	  meanBB += p2[i];
	}
      }

      if (numZeroKludge > 0) {
	double kludgeAA;
	double kludgeAB;
	double kludgeBB;
	
	if (numZeroKludge == n) {
	  kludgeAA = 0.25;
	  kludgeAB = 0.5;
	  kludgeBB = 0.25;
	} else {
	  kludgeAA = ((double)(n - numZeroKludge) - meanAB - meanBB)/(double)(n - numZeroKludge);
	  kludgeAB = (double)(meanAB)/(double)(n - numZeroKludge);
	  kludgeBB = (double)(meanBB)/(double)(n - numZeroKludge);
	}

	for (int i = 0; i < n; ++i) {
	  sump = p0[i]+p1[i]+p2[i];
	  if (sump <= 0) {
	    p0[i] = kludgeAA;
	    p1[i] = kludgeAB;
	    p2[i] = kludgeBB;
	    bigP1[i] = thisp0+thisp1;
	    meanAB += p1[i];
	    meanBB += p2[i];
	    --numZeroKludge;
	  } // else do nothing
	}

	if (numZeroKludge != 0) {
	  cerr << "!" << endl;
	  cerr << "! fatal internal error : could not kludge individual(s) with p0+p1+p2=0" << endl;
	  cerr << "!                        line " << snpnum << " snp " << id2 << endl;
	  cerr << "!" << endl;
	  return(56);
	}
      }

      // HOW TO HANDLE p0+p1+p2==0 PROPERLY ???
      // to deal properly with p0+p1+p2==0, need to 

      // reset n

      // pack down contents of p0, p1, p2, bigP1

      // pack down contents of *y[0], yNorm
      // recompute sumyy, sumyyNorm

      // pack down contents of env,
      // recompute sumenv2, sumyenv

      // we will not refit the mixture model

      // proper handling of this case conflicts with the 
      // --perm-verbose idea


      outputfile << id1 << " " << id2 << " " << pos << " " << alleleA << " " << alleleB;
      outputfile << " " << ((double)n-meanAB-meanBB) << " " << meanAB << " " << meanBB;

      if (method.MAF) {
	double freqB = (0.5*meanAB + meanBB)/(double)n;
	outputfile << " " << (freqB < 0.5 ? freqB : 1.-freqB);
      }

      if (method.rSqHat) {
	double sumX2 = 0.;
	double sumXbar2 = 0.;
	for (int i = 0; i < n; ++i) {
	  sumX2 += p1[i]+4.*p2[i];
	  sumXbar2 += pow2(p1[i]+2.*p2[i]);
	}
	double meanX = (meanAB+2.*meanBB)/(double)n;
	double rSqHat = (sumXbar2/double(n)-pow2(meanX))/(sumX2/double(n)-pow2(meanX));
	// this is fraction of variance explained by the Xbar
	outputfile << " " << rSqHat;
      }

      if (method.alphaHat) {
	outputfile << " " << compute_alphaHat(p0, p1, p2, meanAB, meanBB, n);
      }

      if (method.Hardy) {
	int callAA = 0;
	int callAB = 0;
	int callBB = 0;
	for (int i = 0; i < n; ++i) {
	  switch (callG[i]) {
	  case 0:
	    ++callAA;
	    break;
	  case 1:
	    ++callAB;
	    break;
	  case 2:
	    ++callBB;
	    break;
	  default:
	    break;
	  }
	}
	if (callAA > 0 || callAB > 0 || callBB > 0) {
	  outputfile << " " << SNPHWE(callAB, callAA, callBB);
	} else {
	  outputfile << " NA";
	}
      }

      eSumgg = meanAB + 4.*meanBB - pow2(meanAB + 2.*meanBB) / n + ppbeta;

      if (method.test) {
	random_shuffle(iPerm.begin(), iPerm.end(), randInt);
	synthetic (y[0], *y[1], &meanG, p0, bigP1, n, iPerm, method.testBeta, &sumyy, &sumGG);
	// this replaces the vector pointed to by y[0], with a vector
	// constructed from *y[1] (which is a random permutation of
	// *y[0]) by adding effects of simulated genotypes, with allelic
	// effect method.testBeta.  The new vector is centered and sumyy
	// is updated appropriately
	
	for (int k = 0; k < method.mixBigK; ++k) {
	  pi[k] = piRef[k];
	  mu[k] = muRef[k];
	  sigma[k] = sigmaRef[k];
	}
	if (method.mix) {
	  if (refit_mixture (*y[0], n, sumyy, method.mixBigK, method.mixCentered, &pi, &mu, &sigma, false) &&
	      method_mix_null (*y[0], n, method.mixBigK, pi, mu, sigma, &foonits, &alphaMLE, &foodiff, false)) {
	    // okay
	  } else {
	    cerr << "!" << endl;
	    cerr << "! fatal error : could not (re)fit mixture model to simulated phenotype" << endl;
	    cerr << "!" << endl;
	    return(1);
	  }
	}
	if (!method.mix) {
	  if (method_mean(*y[0], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA";
	  }
	} else { // implies method.mix
	  if (method_mean_mix(*y[0], meanG, n, method.mixBigK, pi, mu, sigma, alphaMLE,
			      &nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se;
	    outputfile << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
      }
    
      if (method.call) {
	if (method.normal) {
	  if (method_call(*y[0], callG, n, priorDF, ppbeta, method.callThresh, &nCalled, &beta, &se, &pval)) {
	    outputfile << " " << nCalled << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
	if (method.qqnormal) {
	  if (method_call(yNorm, callG, n, priorDF, ppbeta, method.callThresh, &nCalled, &beta, &se, &pval)) {
	    if (!method.normal) {
	      outputfile << " " << nCalled;
	    }
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    if (!method.normal) {
	      outputfile << " NA";
	    }
	    outputfile << " NA NA NA";
	  }
	}
      }
      
      if (method.mean || method.interaction || method.binary || method.Pettitt) {
	// for possibility of using permutations, we like to be able to precompute the expected genotypes...
	precompute_mean(p1, p2, n, meanAB, meanBB, &meanG, &sumGG);
      }
      
      if (method.mean) {
	if (method.normal) {
	  if (method_mean(*y[0], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	    
	    if (permVerbose) {
	      for (int p = 1; p <= numPermutations; ++p) {
		if (method_mean(*y[p], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
		  outputfile << " " << pval;
		} else {
		  outputfile << " NA";
		}
	      }
	    } else if (BesagClifford) {
	      double ptrue = pval;
	      double BCp;
	      int g = 0; // number of successes
	      int l = 0; // number of trials
	      
	      do {
		if (l < numPermutations) {
		  if (method_mean(*y[l+1], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
		    if (pval <= ptrue) ++g;
		  } // else something bad happened, assume ! pval <= ptrue
		} else {
		  // we're using the last permutation, so shuffle it
		  random_shuffle(iPerm.begin(), iPerm.end(), randInt);
		  for (int i = 0; i < n; ++i) {
		    (*y[numPermutations])[i] = (*y[0])[iPerm[i]];
		    // no need to: (*y2[numPermutations])[i] = (*y2[0])[iPerm[i]];
		  }
		  if (method_mean(*y[numPermutations], meanG, n, sumGG, sumyy, priorDF, ppbeta, &beta, &se, &pval)) {
		    if (pval <= ptrue) ++g;
		  }  // else something bad happened, assume ! pval <= ptrue
		}
		++l;
		if (g >= threshBesagClifford) {
		  BCp = (double)g/(double)l;
		  break;
		}
		if (l >= maxPermutations) {
		  BCp = (double)(g+1)/(double)maxPermutations;
		  break;
		}
	      } while (true);
	      outputfile << " " << BCp;
	    }
	    
	  } else { // implies first call to method_mean failed...
	    outputfile << " NA NA NA";
	    if (permVerbose) {
	      for (int p = 1; p <= numPermutations; ++p) outputfile << " NA";
	    } else if (BesagClifford) {
	      outputfile << " NA";
	    }
	  }
	}
	
	// within if(method.mean), mean genotypes are precomputed
	if (method.mix) {
	  if (method_mean_mix(*y[0], meanG, n, method.mixBigK, pi, mu, sigma, alphaMLE,
			      &nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
	 
	if (method.qqnormal) {
	  if (method_mean(yNorm, meanG, n, sumGG, sumyyNorm, priorDF, ppbeta, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA";
	  }
	}
	  
	if (method.interaction) { /* inside !bgen here, by the way */
	  if (method.robust) {
	    if (method_robust (yOrig, meanG, vars, vars2, varty, n, nCov, sumyyOrig, &beta, &se, &pval, &betasnp, &sesnp, &pvalsnp, &covsnpinteraction)) {
	      outputfile << " " << betasnp << " " << sesnp << " " << pvalsnp << " " << beta << " " << se << " " << pval << " " << covsnpinteraction;
	    } else {
	      outputfile << " NA NA NA NA NA NA NA";
	    }                         
          } else {
	    if (method_interaction (yOrig, meanG, vars, vars2, varty, n, nCov, sumyyOrig, &beta, &se, &pval)) {
	      outputfile << " " << beta << " " << se << " " << pval;
	    } else {
	      outputfile << " NA NA NA";
	    }
          }
	}
	
	
      } // end if (method.mean)
      
      if (method.score) {
	if (method.normal) {
	  if (method_score(ppbeta, priorDF, p1, p2, *y[0], (double)n, sumyy, &beta, &se, &pval)) {
	    double info = (sumyy/eSumgg-pow2(beta)) / ((double)n-priorDF) / pow2(se); 
	    outputfile << " " << beta << " " << se << " " << pval << " " << info;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
	if (method.mix) {
	  if (method_score_mix(p1, p2, n, pcmL1, pcmL2, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA";
	  }
	}
	if (method.qqnormal) {
	  if (method_score(ppbeta, priorDF, p1, p2, yNorm, (double)n, sumyyNorm, &beta, &se, &pval)) {
	    outputfile << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA";
	  }
	}
      }

      if (method.ML) {
	if (method.normal) {
	  if (method_em(*y[0], p0, p1, p2, n, sumyy, &nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
	
	if (method.mix) {
	  if (method_newton_mix(*y[0], p0, p1, p2, n, method.mixBigK, pi, mu, sigma, alphaMLE,
				&nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}

	if (method.qqnormal) {
	  if (method_em(yNorm, p0, p1, p2, n, sumyyNorm, &nIts, &beta, &se, &pval)) {
	    outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	  } else {
	    outputfile << " NA NA NA NA";
	  }
	}
      } // end if (method.ML)
      
      if (method.MCMC) {
	if (method_mcmc(*y[0], p0, p1, p2, n, sumyy, priorDF, ppbeta, nsims, int(0.25*nsims),
			&nUsed, &ebeta, &sdbeta, &pval)) {
	  outputfile << " " << nUsed << " " << ebeta << " " << sdbeta << " " << pval;
	} else {
	  outputfile << " NA NA NA NA";
	}
      }
      
      if (method.binary) {
	if ((nIts = method_logistic(yBin, meanG, n, vars, nCov, &logipars, false, &beta, &se, &pval))) {
	  outputfile << " " << nIts << " " << beta << " " << se << " " << pval;
	} else {
	  outputfile << " NA NA NA NA";
	}
      }

      if (method.poe) {
	if (method.normal) {
	  if (method_poe(p0, p1, p2,  *y[0], (double)n, &beta, &se, &pval, &varAA, &varAB, &varBB)) {	  
	    outputfile << " " << beta << " " << se << " " << pval << " " << varAA << " " << varAB << " " << varBB;
	  } else {
	    outputfile << " NA NA NA NA NA NA";
	  }
	}
	if (method.qqnormal) {
	  if (method_poe(p0, p1, p2, yNorm, (double)n, &beta, &se, &pval, &varAA, &varAB, &varBB)) {	  
	    outputfile << " " << beta << " " << se << " " << pval << " " << varAA << " " << varAB << " " << varBB;
	  } else {
	    outputfile << " NA NA NA NA NA NA";
	  }
	}
      }


      //if (method.Pettitt) {
      //	if (method_Pettitt(yRank, meanG, n, normordBigA, normordSmallA, &beta, &se, &pval)) {
      //  outputfile << " " << beta << " " << se << " " << pval;
      //	} else {
      //	  outputfile << " NA NA NA";
      //	}
      //}
      outputfile << endl;
    
      } // end while success at reading genotype file
    
    genotypefile.close();
    if (!genotypefile.eof()) {
      cerr << "!" << endl;
      cerr << "! fatal error: unexpected termination of [ " << io.genotypeFilename << " ]" << endl;
      cerr << "!" << endl;
      EXIT_WITH_ERROR(1, "");
    }
    cout << "# finished processing genotype file [ " << io.genotypeFilename << " ] with " << snpnum << " SNPs" << "        " << endl;
    if (snpnum == 0) {
      cout << "! warning : [ " << io.genotypeFilename << " ] was empty, or does not exist" << endl;
    } else {
      snpnumTotal += snpnum;
    }
} // end if bgen
  }  // end while ! io.genotypeFilelist.empty
 

  if (method.copy) {
    copyfile.close();
  }
  
  outputfile.close();

  this_time = clock();
  total_seconds += (double)(this_time - last_time)/CLOCKS_PER_SEC;
  
  cout << "# finished processing all genotype files with total " << snpnumTotal << " SNPs" << endl;
  cout << "# average speed " << fprec(total_seconds*2500./9./(double)snpnumTotal,3) << " hours/million SNPs";
  cout << " or " << fprec((double)snpnumTotal/total_seconds,2) << " SNPs/second" << endl;


  cout << "#" << endl;
  }  
} // end main. Over 2,000 lines !

  
int read_phenotype_file (io_spec* io, list<double>* phenotype, list<bool>* phenotypeokay,
			 int* numCovar, list<double>* covariate) {
  
  vector<string> nCovar (io->nCovar->size());
  bool flagCovar;
  if (io->nCovar->empty()) {
    flagCovar = false;
    *numCovar = 0;
  } else {
    int ii = 0;
    for (list<string>::iterator i = io->nCovar->begin(); i != io->nCovar->end(); ++i) {
      nCovar[ii] = *i;
      ++ii;
    }
    *numCovar = ii;
    flagCovar = true;
  }


  // make a hash table to store the list of SNPs to analyse
  // onlyList must have scope all of main()
  hash_set<const char*, __gnu_cxx::hash<const char*>, strEq> badPeople;

  if (io->exclusions) {
    ifstream exclfile (io->exclusionsFilename.c_str());
    if (!exclfile) {
      cerr << "!" << endl;
      cerr << "! fatal error : cannot open exclusion list file (--BLAH BLAH option)" << endl;
      cerr << "!" << endl;
      return (1);
    }
    cout << "# reading exclusion list [ " << io->exclusionsFilename << " ]" << endl;
    string token;
    while (exclfile >> token) {
      if (badPeople.find(token.c_str()) == badPeople.end()) {
	char* str = new char [token.length()+1];
	strcpy(str, token.c_str());
	badPeople.insert(str);
      }
    }
    exclfile.close();
    cout << "# read " << badPeople.size() << " individuals for exclusion" << endl;
  }

  cout << "# reading phenotype file [ " << io->phenotypeFilename << " ]" << endl;
  ifstream phenofile (io->phenotypeFilename.c_str());
  if (!phenofile) {
    cerr << "!" << endl;
    cerr << "! fatal error : cannot open phenotype file" << endl;
    cerr << "!" << endl;
    return (0);
  }

  string line;
  string token;
  
  list<string> fieldname;
  list<string> fieldtype;
  list<string>::iterator fieldnum;

  unsigned tokenForPheno = 0; // gets set to the token number of the column to use for phenotype
  vector<unsigned> tokenForCovar (*numCovar); // gets set to the token number of the column to use for covariate
  for (int i = 0; i < *numCovar; ++i) {
    tokenForCovar[i] = 0;
  }

  // read header line
  if ( getline(phenofile, line) ) {
    stringstream iss (line);
    while ( iss >> token ) {
      fieldname.push_back(token);
      if (tokenForPheno == 0 && token == io->nPheno) {
	tokenForPheno = (unsigned)fieldname.size();
      }
      if (flagCovar) {
	for (int i = 0; i < *numCovar; ++i) {
	  if (tokenForCovar[i] == 0 && token == nCovar[i]) {
	    tokenForCovar[i] = (unsigned)fieldname.size();
	  }
	}
      }
    }
  } else {
    cerr << "!" << endl;
    cerr << "! fatal error : phenotype file did not have first header line" << endl;
    cerr << "!" << endl;
    return (0);
  }

  if (tokenForPheno >= 1 && tokenForPheno <= 3) {
    cout << "!" << endl;
    cout << "! warning : option : --npheno " << io->nPheno << endl;
    cout << "! matches a reserved column in the phenotype file" << endl;
    cout << "!" << endl;
    cout << "# analysing phenotype [ " << io->nPheno << " ] found in reserved column " << tokenForPheno << endl;

  } else if (tokenForPheno != 0) {
    cout << "# analysing phenotype [ " << io->nPheno << " ] found in data column " << tokenForPheno-3 << endl;

  } else {
    // could not find phenotype in column headers, let's see if it has a valid numeric value
    // assume -npheno N refers to number of column after first three

    stringstream iss (io->nPheno);
    if (iss >> tokenForPheno && tokenForPheno >= 1 && (unsigned)tokenForPheno <= fieldname.size()-3) {
      
      cout << "# assuming option --npheno " << io->nPheno << " refers to a column number after first three reserved columns" << endl;
      tokenForPheno += 3;
      
      fieldnum = fieldname.begin();
      for (unsigned count = 1; count < tokenForPheno; ++count) {
	++fieldnum;
	if (fieldnum == fieldname.end()) {
	  cerr << "!" << endl;
	  cerr << "! fatal error : option : --npheno " << io->nPheno << endl;
	  cerr << "! not enough columns in phenotype file (in header)" << endl;
	  cerr << "!" << endl;
	  return (0);
	}
      }
      cout << "# analysing phenotype [ " << *fieldnum << " ] because in data column " << tokenForPheno << endl;
      
    } else {
      cerr << "!" << endl;
      cerr << "! fatal error : option : --npheno " << io->nPheno << endl;
      cerr << "! does not match any column in phenotype file" << endl;
      cerr << "!" << endl;
      return (0);    
    }
  }
  
  if (flagCovar) {
    for (int i = 0; i < *numCovar; ++i) {
      if (tokenForCovar[i] >= 1 && tokenForCovar[i] <= 3) {
	cout << "!" << endl;
	cout << "! warning : option : --ncovar " << nCovar[i] << endl;
	cout << "! matches a reserved column in the phenotype file" << endl;
	cout << "!" << endl;
	cout << "# using covariate [ " << nCovar[i] << " ] found in reserved column " << tokenForCovar[i] << endl;
	
      } else if (tokenForCovar[i] != 0) {
	cout << "# using covariate [ " << nCovar[i] << " ] found in data column " << tokenForCovar[i]-3 << endl;
	
      } else {
	// could not find covariate in column headers, let's see if it has a valid numeric value
	// assume -npheno N refers to number of column after first three
	
	stringstream iss (nCovar[i]);
	if (iss >> tokenForCovar[i] && tokenForCovar[i] >= 1 && (unsigned)tokenForCovar[i] <= fieldname.size()-3) {
	  
	  cout << "# assuming option --ncovar " << nCovar[i] << " refers to a column number after first three reserved columns" << endl;
	  tokenForCovar[i] += 3;
	  
	  fieldnum = fieldname.begin();
	  for (unsigned count = 1; count < tokenForCovar[i]; ++count) {
	    ++fieldnum;
	    if (fieldnum == fieldname.end()) {
	      cerr << "!" << endl;
	      cerr << "! fatal error : option : --ncovar " << nCovar[i] << endl;
	      cerr << "! not enough columns in phenotype file (in header)" << endl;
	      cerr << "!" << endl;
	      return (0);
	    }
	  }
	  cout << "# using covariate [ " << *fieldnum << " ] because in data column " << tokenForCovar[i]-3 << endl;
	  
	} else {
	  cerr << "!" << endl;
	  cerr << "! fatal error : option : --ncovar " << nCovar[i] << endl;
	  cerr << "! does not match any column in phenotype file" << endl;
	  cerr << "!" << endl;
	  return (0);    
	}
      }
    }    
  }

  for (int i = 0; i < *numCovar; ++i) {
    if (tokenForPheno == tokenForCovar[i]) {
      cerr << "!" << endl;
      cerr << "! fatal error : options : --npheno " << io->nPheno << " --ncovar " << nCovar[i] << endl;
      cerr << "! both specify same column in phenotype file" << endl;
      cerr << "!" << endl;
      return (0);    
    }
    for (int ii = i+1; ii < *numCovar; ++ii) {
      if (tokenForCovar[ii] == tokenForCovar[i]) {
	  cerr << "!" << endl;
	  cerr << "! fatal error : options : --ncovar " << nCovar[i] << " --ncovar " << nCovar[ii] << endl;
	  cerr << "! both specify same column in phenotype file" << endl;
	  cerr << "!" << endl;
	  return (0);    
      }
    }
  }


  if (io->specialOoopsLine) {
    // read second line and ignore its contents
    if ( getline(phenofile, line) ) {
      stringstream iss (line);
      while ( iss >> token ) {
	fieldtype.push_back(token);
      }
    } else {
      cerr << "!" << endl;
      cerr << "! fatal error : phenotype file did not have second header line" << endl;
      cerr << "!" << endl;
      return (0);
    }
    if (fieldname.size() != fieldtype.size()) {
      cerr << "!" << endl;
      cerr << "! fatal error : phenotype file field number mismatch" << endl;
      cerr << "!             : phenotype file 1st line had " << fieldname.size() << " fields" << endl;
      cerr << "!             : phenotype file 2nd line had " << fieldtype.size() << " fields" << endl;
      cerr << "!" << endl;
      return (0);
    }
  }
  
  unsigned ntoken;
  bool suspectOoopsLine = false;

  bool okayNotBad;
  int numBadPeople = 0;
  bool okaypheno;
  double thispheno;
  vector<bool> okaycovar (*numCovar);
  vector<double> thiscovar (*numCovar); 
  bool okayallcovar;

  int phenoNotNA = 0;
  int covarNotNA = 0;

  while ( getline(phenofile, line) ) {
    stringstream iss (line);
    
    okayNotBad = true;
    okaypheno = false;
    thispheno = -9999.; // hopefully any bug will show up in the ranges
    for (int i = 0; i < *numCovar; ++i) {
      okaycovar[i] = false;
      thiscovar[i] = -9999.; // hopefully any bug will show up in the ranges
    }
    okayallcovar = false;

    ntoken = 0;

    // for each line, we read tokens until we either find the one we want, or
    // we run out of tokens.  If the token is NA, -9, or we run out of tokens, the phenotype
    // is considered missing
    while ( iss >> token ) {
      ++ntoken;

      if (io->exclusions && (ntoken == 1 || ntoken == 2)
	  && badPeople.find(token.c_str()) != badPeople.end()) {
	okayNotBad = false;
      }
      
      if (okayNotBad && ntoken == tokenForPheno && token != "NA" && !(io->specialMissingCode && token == io->missingCode) ) {
	if (token == "P") suspectOoopsLine = true;	
	thispheno = strtod(token.c_str(),0);
	okaypheno = true;
	++phenoNotNA;
      }
      if (okayNotBad && flagCovar) {
	for (int i = 0; i < *numCovar; ++i) {
	  if (ntoken == tokenForCovar[i] && token != "NA" && !(io->specialMissingCode && token == io->missingCode) ) {
	    if (token == "P") suspectOoopsLine = true;	
	    thiscovar[i] = strtod(token.c_str(),0);
	    okaycovar[i] = true;
	  }
	}
      }
    }

    if (okayNotBad) {
      okayallcovar = true;
      for (int i = 0; i < *numCovar; ++i) {
	okayallcovar &= okaycovar[i];
      }
      if (okayallcovar) ++covarNotNA;
      if (flagCovar && okaypheno && okayallcovar) {
	phenotype->push_back(thispheno);
	for (int i = 0; i < *numCovar; ++i) {
	  covariate->push_back(thiscovar[i]);
	}
	phenotypeokay->push_back(true);
      } else if (!flagCovar && okaypheno) {
	phenotype->push_back(thispheno);
	phenotypeokay->push_back(true);
      } else {
	phenotypeokay->push_back(false);
      }
    } else {
      phenotypeokay->push_back(false);
      ++numBadPeople;
    }

  }
  phenofile.close();
  
  cout << "# read " << phenotypeokay->size() << " data lines from phenotype file" << endl;
  if (io->exclusions) {
    cout << "# " << numBadPeople << " lines excluded because of matches in [ " << io->exclusionsFilename << " ]" << endl;
  }
  cout << "# found " << phenoNotNA << " non-missing phenotypes" << endl;
  if (flagCovar) {
    cout << "# found " << covarNotNA << " non-missing covariates" << endl;
    if (phenotype->size()*(*numCovar) != covariate->size()) {
      cerr << "!" << endl;
      cerr << "! internal error: phenotype/interaction covariate miscount" << endl;
      cerr << "!" << endl;
      return(0);
    }
  }
  cout << "# will analyse data for " << phenotype->size() << " individuals" << endl;
  
  if (suspectOoopsLine) {
    cout << "!" << endl;
    cout << "! warning : found `P' when expecting phenotype value" << endl;
    cout << "!         : suspect SNPTEST format file, try --ooops-line option" << endl;
    cout << "!" << endl;
  }
  
  int n = (int)phenotype->size();
  if (n==0) {
    cerr << "!" << endl;
    cerr << "! fatal error: zero non-missing phenotypes" << endl;
    cerr << "!" << endl;
    return (0);
  }

  return (n);
}

int read_mix_hint (io_spec* io, int bigK, vector<double>* pi, vector<double>* mu, vector<double>* sigma) {

  cout << "# looking for hints for [ " << bigK << " ] component mixture in [ " << io->mixhintFilename << " ]" << endl;
  ifstream mixfile (io->mixhintFilename.c_str());
  if (!mixfile) {
    cerr << "!" << endl;
    cerr << "! fatal error : cannot open mixture hint file" << endl;
    cerr << "!" << endl;
    return (0);
  }
  
  long unsigned int piStat = 0;
  long unsigned int muStat = 0;
  long unsigned int sigmaStat = 0;

  string line;
  string token;
  char mode = 'w';
  string kVal = "";

  while (getline(mixfile, line)) {
    stringstream liness (line);
    while (liness >> token) {
      // cout << "mode=" << mode << " token=" << token << endl;
      switch (mode) {
      case 'w': // waiting for variable name
	if (token.substr(0,2) == "pi") {
	  mode = 'p';
	  kVal = token.substr(2);
	}
	if (token.substr(0,2) == "mu") {
	  mode = 'm';
	  kVal = token.substr(2);
	}
	if (token.substr(0,5) == "sigma") {
	  mode = 's';
	  kVal = token.substr(5);
	}
	break;
      case 'p':
      case 'm':
      case 's': // value expected, defer if equals sign
	if (token == "=" || token == "<-") {
	  // do nothing
	} else {
	  stringstream valss (token);
	  double val;
	  if (valss >> val) {
	    int k = (int)atol(kVal.c_str());
	    if (k > 0 && k <= bigK) {
	      switch (mode) {
	      case 'p':
		if (val > 0) {
		  (*pi)[k-1] = val;
		  piStat |= (1 << (k-1));
		}
		break;
	      case 'm':
		(*mu)[k-1] = val;
		muStat |= (1 << (k-1));
		break;
	      case 's':
		if (val > 0) {
		  (*sigma)[k-1] = val;
		  sigmaStat |= (1 << (k-1));
		}
		break;
	      }
	    }
	  }
	  mode = 'w';
	}
	break;
      }
    }
  }
  mixfile.close();

  long unsigned int one = 1;
  if (piStat == (one<<bigK)-1 && muStat == (one<<bigK)-1 && sigmaStat == (one<<bigK)-1) {
    cout << "# mixture hints parsed successfully" << endl;
    for (int k = 0; k < bigK; ++k) {
      cout << "#   pi" << k+1 << " = " << (*pi)[k];
      cout << " mu" << k+1 << " = " << (*mu)[k];
      cout << " sigma" << k+1 << " = " << (*sigma)[k] << endl;
    }
    return (1);
  } else {
    cerr << "!" << endl;
    cerr << "! error: mixture hint parsing failed for:";
    for (int k = 1; k <= bigK; ++k) {
      if (! (piStat & 1)) { cerr << " pi" << k; }
      if (! (muStat & 1)) { cerr << " mu" << k; }
      if (! (sigmaStat & 1)) { cerr << " sigma" << k; }
      piStat >>= 1;
      muStat >>= 1;
      sigmaStat >>= 1;
    }
    cerr << endl << "!" << endl;
  }
  return(0);
}


