#include <iostream>
#include <fstream>
#include <iomanip>
#include "files.h"
#include "control.h"
#include "warning.h"
#include "options.h"
#include "trait.h"
#include "model.h"
#include "linkagedist.h"
#include "linkagemodel.h"
#include "parmodel.h"
#include "simulation.h"
#include "genotypes.h"
#include "family.h"

// Read a numeric variable of type T from dat. If read fails,
// msg is printed and a fatal error occurs
template<class T>
void readnumeric(T &target, ISTRINGSTREAM *dat, const string &msg) {
  target = T(-1);
  *dat >> target >> ws;
  assertcond(dat->good() || dat->eof() && target != T(-1), msg);
}

// Get the next line from dat. If dat is allready at eof, then
// a message is printed and a fatarl error occurs
void getnextline(Infile &dat, Uint &curline, ISTRINGSTREAM *&line) {
  const Uint MAXLINE = 100000;
  static char linebuffer[MAXLINE];
  curline++;
  assertcond(!dat.eof(), string("Datafile input ") + curline + 1 + 
             ": Unable to read line");
  dat.getline(linebuffer, MAXLINE);
  delete line;
  line = new ISTRINGSTREAM(linebuffer);
}

// return the index of i in the list usemarkers, if it is in the list,
// and -1 otherwise
int Control::markerused(Uint i, const UintVec usemarkers, Uint totuse) {
  for (Uint j = 0; j < totuse; j++)
    if (i == usemarkers[j]) return j;
  return -1;
}

Trait *trait;

// To input Linkage style datafiles (*.pre and *.dat files)
// Assumes the SAME format as given to GeneHunter (i.e. dat-file 
// with marker name included)
void Control::getdat(Infile& dat, UintVec &usemarkers, Uint &totuse,
                     Uint &totnum, Uint popidx) {
  // marker names, recombination and alleles frequencies go into 'map'
  // disease gene frequency, penetrance, etc. go into 'trait'
  // the 'usemarkers' which markers are used (i.e. line 2 in datafile)
  // tempory variables:
  dat.open();
  Uint ignoreUint;
  string ignorestring;
#define DATERROR string("Input datafile ") + curline + ": "
  ISTRINGSTREAM *inputline = 0;
  Uint curline = 0;
  
  // line 1:
  getnextline(dat, curline, inputline);
  int sexlinked;
  if (popidx == 0) { // Only read for first datfile
    readnumeric(totnum, inputline, DATERROR +
		"Unable to to read number of markers");
    readnumeric(ignoreUint, inputline, DATERROR + "ignore 1");
    readnumeric(sexlinked, inputline, DATERROR +
                "Unable to read sexlinked flag");
    readnumeric(ignoreUint, inputline, DATERROR + "ignore 2");
    --totnum; // total number of markers in datafile
    assertcond(totnum > 0, DATERROR + "invalid number of markers (" +
	       totnum + ")");
    assertcond(sexlinked == 0 || sexlinked == 1, DATERROR +
	       "sex-linked flag must be either 0 or 1");
    options->sexlinked = sexlinked == 1;
  }
  
  // line 2: nothing
  getnextline(dat, curline, inputline);
  
  // line 3: which markers are used in the map (the number of markers is unknown)
  Uint unnamedmarkeridx = 1;
  int linkagelocusidx = -1;
  getnextline(dat, curline, inputline);
  if (popidx == 0) { // Only read for first datfile
    totuse = 0;
    NEWVEC(Uint, usemarkers, totnum);
    while(!inputline->eof()) {   
      // The following line may be incorrect
      assertcond(totuse <= totnum, DATERROR +
		 "More markers to use than specified in line 1");
      Uint marker = 0;
      readnumeric(marker, inputline, DATERROR +
		  "Unable to read which markers to use");
      assertcond(marker != 0, DATERROR +
		 "Do not include disease locus (locus 1) in list of markers " +
		 "to analyse");
      if (marker == 1)
        linkagelocusidx = totuse;
      else {
        usemarkers[totuse] = marker;
        assertcond(usemarkers[totuse] <= totnum + 1,
                   DATERROR + "Illegal marker number " + usemarkers[totuse]);
        usemarkers[totuse]--;
        totuse++;
      }
    }
    map.init(totuse);
  }

  // 4, 5:  gene frequencies
  Float df;
  getnextline(dat, curline, inputline);
  getnextline(dat, curline, inputline);
  if (popidx == 0) { // Only read for first datfile
    readnumeric(df, inputline, DATERROR + "Unable to read disease gene freq");
    assertcond(df >= 0.0 && df <= 1.0, DATERROR +
	       "Invalid prevelance (disease gene freq. = " + df + ")");
  }

  // line 6:
  getnextline(dat, curline, inputline);
  if (popidx == 0) { // Only read for first datfile
    Uint nliab;
    readnumeric(nliab, inputline, DATERROR +
		"Unable to read number of liability classes");
    trait = new Trait(nliab*(options->sexlinked ? 2 : 1), df, sexlinked);

    // penetrance
    for (Uint i = 0; i < nliab; i++) {
      Uint i1 = options->sexlinked ? 2*i : i;
      getnextline(dat, curline, inputline);
      readnumeric(trait->penetrance[i1][0], inputline, DATERROR +
		  "Unable to read penetrance number " + i);
      readnumeric(trait->penetrance[i1][1], inputline, DATERROR +
		  "Unable to read penetrance number " + i);
      readnumeric(trait->penetrance[i1][2], inputline, DATERROR +
		  "Unable to read penetrance number " + i);
      assertcond(trait->penetrance[i1][0] >= 0 && trait->penetrance[i1][1] >= 0 &&
		 trait->penetrance[i1][2] >=0, DATERROR + "Invalid penetrance");
      if (options->sexlinked) {
        Uint i2 = i1 + 1;
        getnextline(dat, curline, inputline);
        readnumeric(trait->penetrance[i2][0], inputline, DATERROR +
                    "Unable to read penetrance number " + i);
        readnumeric(trait->penetrance[i2][1], inputline, DATERROR +
                    "Unable to read penetrance number " + i);
        assertcond(trait->penetrance[i2][0] >= 0 && trait->penetrance[i2][1] >= 0,
                   DATERROR + "Invalid penetrance");
      }
    }
  }
  else {
    for (Uint i = 0; i < trait->nliability; i++)
      getnextline(dat, curline, inputline);
  }
  // input marker frequencies and names:
  FloatVec alfreq;
  for (Uint i = 1; i <= totnum; i++) {
    // e.g.: 3  4 # D22S420
    getnextline(dat, curline, inputline);
    int midx = markerused(i, usemarkers, totuse);
    if (midx != -1) { // use this marker
      Uint markertype;
      Uint allelecount;
      readnumeric(markertype, inputline, DATERROR + "Unable to read penetrance");
      assertcond(markertype == 3, DATERROR + "Unknown marker type " + markertype);
      readnumeric(allelecount, inputline, DATERROR + "Unable to read penetrance");
      string markername;
      *inputline >> ignorestring >> ws >> markername;
      if (markername == "") {
        markername = string("M") + unnamedmarkeridx;
        unnamedmarkeridx++;
      }
      NEWVEC(Float, alfreq, allelecount);
      getnextline(dat, curline, inputline);
      for(Uint k = 0; k < allelecount; k++) {
        // e.g.: 0.1 0.5 0.4
        readnumeric(alfreq[k], inputline, DATERROR + 
          "Unable to read allele frequency for marker " + markername);
        assertcond(alfreq[k] >= 0.0 && alfreq[k] <= 1.0, DATERROR + 
		      "Invalid allele frequency for marker " + markername);
      }
      if (popidx == 0) 
        map.addmarker(midx, allelecount, alfreq, markername);
      else
        map.addpopulationfreq(midx, popidx, allelecount, alfreq);
    }
    else {
      // Ignore allele frequencies
      getnextline(dat, curline, inputline);
    }
  }

  if (popidx == 0) { // Only read for first datfile
    // end line - 2: recomb rates sex specific ? (two numbers: 0  0 )
    getnextline(dat, curline, inputline);
    Uint sexspec;
    readnumeric(sexspec, inputline, DATERROR +
                "Unable to read sex-difference type");
    if (sexspec > 0) options->sexspecific = true;
    
    // end line - 1: recombination (or cM):
    FloatVec dist;
    NEWVEC(Float, dist, totuse);

    Uint nsex = options->sexspecific ? 2 : 1;

    for (Uint jsex = 1; jsex <= nsex; jsex++) {
      getnextline(dat, curline, inputline);
      if (linkagelocusidx == 0) {
        Float curdist = -1.0;
        readnumeric(curdist, inputline, DATERROR +
                    "Unable to read distance from disease locus to first marker");
      }
      Float lastdist = 0.0;
      for(Uint j = 0; j < totuse - 1; j++) {
        Float curdist = -1.0;
        readnumeric(curdist, inputline, DATERROR + 
                    "Unable to read intermarker distance number " + (j + 1));
        assertcond(curdist >= 0, DATERROR +
                   "Negative distance between markers: " +
                   Floattostring(curdist, 4));
        if (linkagelocusidx > 0 && j == linkagelocusidx) {
          j--;
          lastdist = curdist;
        }
        else {
          if (options->unit == CENTIMORGAN || options->unit == DEFAULT)
            dist[j] = curdist + lastdist;
          else
            dist[j] = curdist + lastdist - 2*curdist*lastdist;
          lastdist = 0.0;
          if (dist[j] == 0)
            warning("Distance between two markers set to 0 in datfile!");
          if (dist[j] > 1000) {
            warning("Distance between two markers greater than 1000cM in datfile, set to 1000cM!");
            dist[j] = 1000;
          }
        }
      }
      map.adddist(dist, nsex == 1 ? 0 : jsex);
    }
    DELETEVEC(dist);
    
    // end line: (e.g. 1 0.10000 0.450000)
    getnextline(dat, curline, inputline);

    Parmodel::setdefaulttrait(trait);
    if (options->simulation != 0) options->simulation->trait = trait;
  }
  dat.close();
}

void Control::getpre(Infile& pre, const UintVec usemarkers, 
                     const Uint totuse, const Uint totnum, Uint popidx) {
  string fid, pid;
  Stringvector faid;
  Stringvector moid;
  
  int sex, ds, liab = 0;
  IntVec a1 = 0;
  IntVec a2 = 0;
  if (totnum > 0) {
    NEWVEC(Allele, a1, totnum);
    NEWVEC(Allele, a2, totnum);
  }
#define PREERROR string("Prefile input ") + curline + ": "

  pre.open();
  Uint k = 0, curline = 0;
  Family *last = firstorig;
  while (last != 0 && last->next != 0) last = last->next;
  Family *fam = last; 
  pre >> fid;
  bool firstfamily = true;

  Boolvector uninformative;
  while (true) {
    bool familychange = pre.eof() || firstfamily || fam->id != fid;
    if (familychange) {
      bool ok = true;
      if (!firstfamily) {
        // Set genotyped flag
        if (options->chromolist == 0) {
          uninformative.clear();
          uninformative.resize(map.num);
          for (Uint gam = 0; gam < map.num; gam++) {
            Allele a = fam->first->gen[0][gam];
            uninformative[gam] = true;
            for (Person *p = fam->first; p != 0 && uninformative[gam];
                 p = p->next)
              uninformative[gam] = uninformative[gam] &&
                a == p->gen[0][gam] && a == p->gen[1][gam];
          }
          for (Person *p = fam->first; p != 0; p = p->next) {
            if (options->simulation == 0)
              for (Uint gam = 0; gam < map.num && !p->genotyped; gam++)
                p->genotyped |= p->genotyp[gam];
            else p->genotyped = p->genotyp[0] != ALLELEUNKNOWN;
          }
        }
        else {
          for (Person *p = fam->first; p != 0; p = p->next) {
            PN *q = string2pn.get(p->id.c_str());
            if (q != 0) {
              p->genotyped = q->genotyped();
              if (options->affectionfile.assigned())
                p->origdstat = p->dstat = q->dstat;
            }
            else {
              p->genotyped = false;
              if (options->affectionfile.assigned())
                p->origdstat = p->dstat = UNKNOWN;
            }
            assertcond(p->dstat <= 2, string("Illegal affection status of ") +
                       p->dstat + " for person " + p->id);
          }
        }
        fam->organize(faid, moid);
        faid.clear();
        moid.clear();
      }
      firstfamily = false;
      if (!ok) {
        if (fam == firstorig) {
          delete fam;
          fam = firstorig = 0;
        }
        else {
          delete fam;
          fam = last;
        }
      }
      if (!pre.eof()) {
        // start of a new family ...
        if (fam == 0) {
          last = fam = new Family(fid, popidx);
          firstorig = fam;
        }
        else {
          last = fam->next = new Family(fid, popidx);
          fam = fam->next;
        }
      }
      else break;
      k = 0;
    }
    ISTRINGSTREAM *inputline = 0;
    getnextline(pre, curline, inputline);   // e.g.: Fam1 PN1 Fa1 Mo1
    string fathersid, mothersid;
    *inputline >> pid >> fathersid >> mothersid;
    faid.push_back(fathersid);
    moid.push_back(mothersid);
    readnumeric(sex, inputline, PREERROR + "Unable to read sex");
    if (!options->affectionfile.assigned())
      readnumeric(ds, inputline, PREERROR + "Unable to read disease status");
    if (options->datfile.size() > 0) {
      if (trait->nliability > (options->sexlinked ? 2 : 1)) {
        readnumeric(liab, inputline, PREERROR +
                    "Unable to read liability class");
        if (options->sexlinked && sex == 1)
          liab += trait->nliability/2;
        liab--;
      } else 
        liab = options->sexlinked ? 2 - sex : 0;
      
      for (Uint i = 1; i <= totnum && (i < 2 || options->simulation == 0);
           i++) {
        int midx = markerused(i, usemarkers, totuse);
        if (midx == -1) {
          Uint dumb;
          readnumeric(dumb, inputline, PREERROR + 
                      "Unable to read first allele for marker number " + i);
          readnumeric(dumb, inputline, PREERROR + 
                      "Unable to read second allele for marker number " + i);
        }
        else {
          readnumeric(a1[midx], inputline, PREERROR + 
                      "Unable to read first allele for marker number " + i);
          readnumeric(a2[midx], inputline, PREERROR + 
                      "Unable to read second allele for marker number " + i);
          assertcond(a1[midx] >= 0 && a1[midx] <= map.numallele[midx],
                     string("Illegal allele value ") + a1[midx] +
                     " for marker number " + i);
          assertcond(a2[midx] >= 0 && a2[midx] <= map.numallele[midx],
                     string("Illegal allele value ") + a2[midx] +
                     " for marker number " + i);
        }
      }
    } else
      liab = options->sexlinked ? 2 - sex : 0;
    
    k++;
    assertcond(liab >= 0 && liab < 10000, string("Invalid liability class ")
               + liab + " for person " + pid + " in family " + fid);
    Double trval = NOTRAITVALUE;
    if (options->traitvaluefile.assigned()) {
      String2Double::iterator it = string2traitvalue.find(pid);
      if (it != string2traitvalue.end())
        trval = it->second;
    }
    Person *p = fam->addperson(pid, Sex(sex-1), Diseasestatus(ds), liab, trval);
    if (options->chromolist == 0) p->setgenotypes(a1, a2, totuse);
    pre >> fid;
  }

  pre.close();
}

void Control::inputld() {
  UintVec usemarkers; 
  Uint totnum, totuse;

  for (Uint popidx = 0; popidx < options->prefile.size(); popidx++) {
    Infile datfile, prefile;
    datfile.setname(options->datfile[popidx]);
    getdat(datfile, usemarkers, totuse, totnum, popidx);
    prefile.setname(options->prefile[popidx]);
    getpre(prefile, usemarkers, totuse, totnum, popidx);
  }

  assertcond(firstorig != 0, "\nNo informative families to analyse!");
  DELETEVEC(usemarkers);
}
