#ifndef _OPTIONS
#define _OPTIONS

/*
% This file shows all possible keywords for the option file. It is in fact a 
% syntactically legal option file, but to obtain a functional option file, one
% of the two input styles should be chosen, and to obtain a realistic file,
% some (sometimes conflicting) options should be removed or commented out

% Note that comments are preceeded by % and that both keywords and values are 
% case insensitive except for file names. All output file names are optional 
% and the file names shown here are in fact the default ones. Input files, how-
% ever, have no default names. Allegro accepts input in either of two styles:
% `Linkage' style (same as used by Genehunter and the well known program 
% `Linkage', see [1]), and `new' format. Note that with Linkage style input, 
% each run will only analyse one chromosome, but with the new style, several 
% chromosomes may be handled in one run


% Linkage style input:
% --------------------
DATFILE alle.dat    % `Linkage' style marker data file (`datfile')
PREFILE alle.pre    % `Linkage' style `prefile' with pedigree and genotype data
UNIT centimorgan    % Unit for datfile, alternative is 'recombination'

% The format of these files is described in the Allegro User Manual, and in [1] 
% (with SEXSPECIFIC on the datfile has an additional line with intermarker 
% distances). The default is to assume that the unit is 'recombination' if all 
% values in the datfile are < 0.5, otherwice cM


% New input format (the comments list the columns in the files):
% --------------------------------------------------------------
PREFILE dxx.pre         % Family, individual, father, mother, sex, aff.status
GENOTYPEFILE dxx.gen    % Individual, marker, allele1, allele2
MAPFILE dxx.map         % Chromosome, marker, loc[-male, loc-female]
CHROMOSOMEFILE chr.dat  % Chromosome name, ctype (autosomal/sex), clength
FREQFILE dxx.freq       % Marker, allele, frequency
AFFECTIONFILE dxx.aff   % Individual, affection status (this file is optional)

CHROMOSOMES C01 C02 C03 % Analyse chromesomes 1, 2 and 3
CHROMOSOMES all         % Analyse all chromosomes
CONTROLPRIOR 2          % Add 2 (the default) to all frequencies in FREQFILE

ADDENDMARKERS off       % Turns off the default behaviour of add a marker
                        % at the beginning and end of each chromosome

% The prefile here corresponds to first 6 columns of a Linkage style prefile
% If `AFFECTIONFILE' (with coded affection status) is given the sixth column in
% the prefile is overwritten (or the the prefile need only have five columns).
% If a marker is missing from FREQFILE the frequencies in the data are used.
% If `CONTROLPRIOR' is omitted, frequencies are flattened by adding 2 to each
% If `SEXSPECIFIC' is on the MAP file has four columns


% Specification of analyses to be carried out
% -------------------------------------------
MODEL mpt par het param.mpt fparam.mpt 
   % Classical parametric multipoint analysis. Keyword `het' is included to
   % output heterogeneity parameters (may be omitted). Disease allele frequen-
   % cies and penetrances are read from the datfile. 'MODEL spt ...' may also
   % be used. This implies single point (two point) analysis; a LOD-score
   % corresponding to the disease gene being at the marker is calculated based
   % only on the genotype data at the marker (ignoring data at other markers)
MODEL mpt par freq:0.02 pen:1.0/0.9/0.01 het param.mpt
   % Use 0.02 as disease allele frequency and penetrances of 0.01 for dd
   % (no disease allele), 0.9 for dD and 1.0 for DD
MODEL mpt par X freq:0.02 pen:1.0/1.0/0.01/1.0/0.01 het param.mpt
   % Same thing for sexlinked chromosomes, the first three penetrances are for
   % female, the second two for male

MODEL mpt exp pairs equal     exppairs.mpt         fexppairs.mpt
MODEL mpt lin all   equal     linall.mpt           flinall.mpt
MODEL spt lin all   f3wts     linall.f3wts.spt     flinall.f3wts.spt
MODEL spt lin homoz equal     linhomoz.spt         flinhomoz.spt
MODEL mpt exp pairs power:0.5 exppairs.0.5.mpt     fexppairs.0.5.mpt
MODEL mpt exp pairs power:1   exppairs.1.mpt       fexppairs.1.mpt
MODEL mpt exp pairs f3wts     exppairs.f3wts.mpt   fexppairs.f3wts.mpt
MODEL mpt exp pairs hodge     exppairs.hodge.mpt   fexppairs.hodge.mpt
   % All these options specify allele sharing analysis, multipoint or single
   % point, using either the linear or the exponential model of [2]. The third
   % argument on the line (pairs, all, etc.) is the name of the scoring
   % function used, alternatives to the ones shown are `mnallele' and `robdom',
   % see [3] and [4]. The fourth argument specifies family weights, `equal'
   % is used for equal weighting of families, `power:0.5' and `power:1' set the
   % s of section 5.1 in [4] to 0.5 and 1 respectively, and `f3wts' specifies
   % a file to read the weights from. The next two arguments are output files,
   % the first one is for `total' output and the second one for `per-family' 
   % output. The default names of these files are derived from the other
   % arguments as indicated by the examples. The output files all contain both
   % allele sharing LOD scores and NPL scores

MODEL mpt exp genpairs:gw.dat equal expgenpairs.gw.dat.mpt
   % A genearalization of the pairs scoring function, defined as:
   %
   %   S = \sum_{ij} w_{ij} S_{ij},
   %
   % where the sum is taken over all pairs of relatives i and j, S_{ij} is the
   % number of alleles i and j share IBD, and w_{ij} is the weight given to the
   % pair i, j. The format of `gw.dat' is:
   %
   % person1 person2 weight
   
% Various control options:
% ------------------------
STEPS 5                 % Evaluate LOD score at 4 intermarker loci
MAXSTEPLENGTH 0.3       % Alternative to STEPS (unit is cM). With SEXSPECIFIC
                        % on, sets maximum sex-averaged step length.
STEPFILE steps.dat      % Alternative to STEPS; read locations from `steps.dat'
                        % (with SEXSPECIFIC on, these are sex-averaged locns)
                        % The default is to use `STEPS 2'
MAXMEMORY 320           % Set maximum amount of CPU memory used to 320 Mb
NPLEXACTP on            % Calculate exact p-value for NPL score
ENTROPY on              % Calculate entropy (Genehunter information)
SEXSPECIFIC on          % Use sex specific recombination fraction
MAXUNLOOP 3             % Level of loop unrolling in FFT (see [4], sec. 3.1)
                        % (default is 3, Pentium optimum 2, Sun 3, Dec Alpha 4)
SWAPDIRNAME /tmp/swap   % Place to swap to when saving on memory
SEED 1319131            % `Seeds' pseudo random number generators
CROSSOVERRATE xover.out fxover.out % Estimate and print out crossoverrate 
                                   % between markers
UNINFORMATIVE uninformative.out    % Print list of uninformative markers

% Pairwise ibd calculation:
% -------------------------
PAIRWISEIBD mpt all prior.mpt posterior.mpt
PAIRWISEIBD spt all prior.spt posterior.spt
   % Calculate pairwise ibd shareing between all pairs of people. The `all'
   % option can be substituded with `genotyped', `affected', `qtl' and
   % `informative'

% Haplotyping and simulation:
% ---------------------------
HAPLOTYPE haplo.out ihaplo.out founder.out inher.out
   % Reconstruct haplotypes. All the arguments are output file names (and
   % thus optional). Haplo.out, contains reconstructed haplotypes for genotyped
   % individuals only, ihaplo.out contains in addition imputed haplotypes for
   % other individuals, inher.out lists the most probable inheritance vector
   % path and founder.out lists the same information in a different format

SIMULATE dloc:50.53 npre:100 rep:50 het:0.70 interf:4.3/4.3 perfectdata
SIMULATE dloc:50.53 npre:100 rep:50 err:0.01 interf:1/1 yield:0.9 het:0.70
   % Simulate genotypes of families with the same family structure as in
   % the input prefile, at the markers given in the input datfile, given the
   % phenotypes (affection status) of the prefile and the parametric disease
   % model of the datfile. The disease locus is set at 50.3 cM, each simulated
   % genotype has a 1% probability of being wrong (random, distributed as
   % specified in the datfile), the probability of a simulated genotype being
   % put in to the prefile is 90% (otherwise a 0 is put in its place), and each
   % family has a 70% chance of being completely unconnected to the disease
   % locus (heterogeneity). The simulation is repeated 100 x 50 times, and the
   % results are reported in 100 output prefiles, each containing the family
   % structure of the input prefile repeated 50 times, together with the
   % simulated genotypes. If the input prefile is named xx.pre, the created
   % prefiles are named xx.pre.001, xx.pre.002, ...., xx.pre.100.
   % SIMULATE requires that Linkage style input format be used. To simulate
   % interference use the interf:nu_male/nu_female option. nu_male is the
   % male_interferance parameter and nu_female the female one. nu = 1 is no
   % interferance, nu < 1 negative interferance and nu > 1 is positive
   % interferance  (See Broman and Weber (2000)). 

% Undocumented and unsupported options: 
% _____________________
% The following options are not documented in the `Allegro User Manual'.
% Some of them might be useful to some users

WRITEPROBFILES on    % The program creates probs and nullprobs files with
                     % ...
                     % (alternative to `on' is `off', which is the default).
                     % Files are created for all pt-scoringfunction combinations
                     % that appear in `MODEL spt/mpt lin/exp ...'  lines. The
                     % MODEL examples above will create probpairs.mpt,
                     % proball.mpt, proball.spt, probhomoz.spt, nullpairs.dat,
                     % nullall.dat and nullhomoz.dat

NULLFILENAME pairs nullpairs.dat     % Use these options to specify the names
PROBFILENAME spt pairs probpairs.spt % of the files used by WRITEPROBFILES
PROBFILENAME mpt pairs probpairs.mpt % (default names are shown)

FOUNDERCOUPLES off   % The default is `on'. Turning off founder couple
                     % reduction slows down the program approximately by a
                     % factor of 2. The main reasons to do that would be to
                     % do timing experiments or to locate programming errors

MARKERDISTZERO off   % Use `on' to allow markers to be placed in same position

CHECKCONDITION 50    % Check condition of normalizing constants every 50
                     % (the default) iterations

LODEXACTP on         % Calculate exact p-value for the exponential model

NPLPFILE on          % Write table of exact p-values for NPL-score to disk

MONTECARLO [markers:markerfile] n mhaplo.out mihaplo.out mfounder.out minher.out
          % Simulate inheritance vector paths given the genotype
          % data .... (documentation to be finished)
HAPLOTYPEFIXBITS on % Stops the bits that fixed because of symmetry from being
                    % permuted in haplotype simulation and estimation
HAPASSHAPLOTYPES on % Haplotypes simulated by montecarlo are compressed for
                    % use with the hapass program
COMPRESSHAPLOTYPES on % A simple compression format of the the monte carlo
                      % simulation output

% Prints information for the hapass program for all markers in markerfile
ALLHAPLOTYPES [markers:markerfile] allhaplos.out

ALLOWINCONSISTENT inconsistent.out % Inconsistent genotypes at a marker no
                                   % longer cause fatal errors, but the marker
                                   % is treated as missing for the inconsistent
                                   % family. Inconsistent marker/family pairs
                                   % are written to inconsistent.out

WARNONMULTIPLEMAXIMA on % Warn if multiple maxima in maximization
                        % This used to be the default

PRINTORIGINALALLELES [unknownallele=-999999 [shift=0]] % Print alleles in
                                             % genotype file instead of
                                             % allegro's internal alleles

XOVERPRECISION 3 % Set precision in xoverrate column to 3

CALCULATELIKELIHOOD likelihood.out

% QTL options:
%-------------
TRAITFILE traitvalues.trt % A file containing quantitative trait values

ALLOWNEGATIVEVC on % Allows the maximizing variance to be negative

MODEL mpt varcomp shared:0.3 pihat varcomp.0.3.pihat.mpt
MODEL mpt varcomp shared:max admix varcomp.max.admix.mpt
%%MODEL mpt varcomp correlations.dat pihat varcomp.correlations.dat.pihat.mpt

MODEL mpt condlik shared:0.3 condlik.0.3.mpt
MODEL mpt condlik shared:max condlik.max.mpt
%%MODEL mpt condlik correlations.dat condlik.correlations.dat.mpt

% The following scoring functions are available (see paper for details):
%
% QTLwpc                 Commanges scoring function
% QTLhe        corr:0.3 var:0.1  Scoring funcion based on the HE test
% QTLnhe       corr:0.3  Scoring funcion based on the NHE test
%
% The following scoring functions are based on normal distribution variance
% components modelling. G is a polygenic/environmental component, a is an
% additive component and d is a dominance component.
%
% QTLscore     corr:0.3  Score statistic used as scoring function
% QTLpairs     VC:G/a/d  Based on treating pairs independantly and maximising
%                        the likelihood ratio statistic
% QTLgenpairsncp VC:G/a/d  Based on treating pairs independantly and maximising
%                        the non-centrality parameter
% QTLfamily    VC:G/a/d  Based on analysing families as a whole and maximising
%                        the likelihood ratio statistic
% QTLfamilyncp VC:G/a/d  Based on analysing families as a whole and maximising
%                        the non-centrality parameter
%
% Examples of use (the default weighting scheme for QTLs is power:1):
MODEL mpt exp QTLscore cor:0.2
MODEL mpt lin qtlfamily VC:0.5/0.1/0.1 sigma2_a:0.2 sigma2_d:0

% A file of pairwise scores is created for each scoring function
% format is (score1 is the score if 1 allele is shared IBD, and score2
% is the socre if 2 alleles are shared IBD):
%
%  family person1 person2 score1 score2
%
% The names of the files will be scoringfunction.sc (e.g. qtlwpc.sc).
PRINTPAIRSCORES on

% Turns off printing of family results
% This applies to all famfiles except pairwise ibd files and haplotype files.
% If FAMFILES on is given all famfiles will be printed. If no FAMFILES option
% is given only those famfiles explicitly named in the options file will be
% printed.
FAMFILES off

% exp/lin can be replaced with poly:n, where n is an even integer

% The following line runs linkage on the marker given, and drops it from
% the scan.
MODEL mpt marker:D1S100 marker.D1S100.mpt fmarker.D1S100.mpt
MODEL mpt marker:DXS999 X marker.DXS999.mpt fmarker.DXS999.mpt

% Counts number of forced crossovers
FORCEDXOVERS forced.out fforced.out

% Treats the chromosome being analysed as pseudoautosomal
PSEUDOAUTOSOMAL on

% In order to have multiple population allele frequencies use multiple pre-
% and datfiles, e.g.:
PREFILE pre1 pre2 pre3
DATFILE dat1 dat2 dat3
% There must be as many prefiles as there are datfiles. For the families in
% prefile i, the allele frequencies in datfile i are used. The only things read
% from the datfiles after the first one are the allele frequencies.

% Allows for consistant allele mapping using new input
OLDFREQFILE cont.frq

% Allows transmissions from mother to be weighed differently than
% transmissions from father. S = a*t_mm + b*t_mf + c*t_ff, where t_mm is 1
% if the pair shares ibd and 0 otherwise, both getting allele from mother.
MODEL mpt exp ps:a/b/c equal

% A test for association. If weights:wf.dat is given then the weight of
% each person is read from wf.dat, otherwise affecteds are given weight 1
% and unaffected weight -1 (unknowns get weight 0). If the datfreq option
% is used then the allele frequencies given in the datfile/freqfile are
% used otherwise the observed allele frequencies in patients and controls
% are used. Singleton affecteds/controls can be added to the association
% calculations by using the patientfile:/controlfile: options (the format
% of these files is the 4 column genotype file).
MODEL mpt assoc datfreq assoc.mpt fassoc.mpt
MODEL mpt assoc patientfile:patients.gen controlfile:controls.gen
MODEL dpt assoc waff:.01 wunaff:-.001
MODEL spt assoc weights:wf.dat

% Prints family information for allele sharing models. The output contains
% the (unnormalized) family weight, minimum Z score, and maximum Z score.
FAMINFO on

% Prints out distribution of counts of affected descendants of founder
% alleles (used in haplotype association). If the ordered option is used,
% the order of the founder alleles matters (the same order as in the
% HAPLOTYPE/MONTECARLO output).
FOUNDERCOUNT foundercount.out
FOUNDERCOUNT ordered foundercount.out

% The dpt option performs drop-this-marker-point analysis. This is
% similar to multipoint, the difference being that instead of all the
% data being used, all the data except the data from the marker being
% investigated is used
MODEL dpt exp pairs equal exppairs.dpt fexppairs.dpt

% Output families will be the same as the input families. WARNING:
% This may cause parametric analysis to fail or give incorrect results
NOSPLITTING on

% References:
% -----------
% [1] Terwilliger JD, Ott J (1994) Analysis of human genetic linkage, Johns 
%     Hopkins University Press, Baltimore and London
% [2] Kong A, Cox NJ (1997) Allele-sharing models: LOD scores and accurate 
%     linkage tests. Am J Hum Genet 61:1179--1188
% [3] McPeek MS (1999) Optimal allele-sharing statistics for genetic mapping 
%     using affected relatives. Genet Epidemiol 16:225-249

*/
#include "basic.h"
#include "files.h"

#ifdef HAVE_SSTREAM
#include <sstream>
#define ISTRINGSTREAM istringstream
#else
#include <strstream>
#define ISTRINGSTREAM istrstream
#endif

class Chromolist {
public:
  Chromolist(string i) : id(i), next(0) {all = i == "all";}
  string id;
  bool all;
  Chromolist *next;
};

class Simulation;
class Infile;
class Outfile;

enum Optbool {OB_NOTSET, OB_ON, OB_OFF};

class Options {
public:
  Options(const string& optionsfile);
  Stringvector prefile;
  Stringvector datfile;
  Infile stepfile;
  Outfile uninformative;
  Outfile inconsistent;
  Outfile rfile;
  Outfile lfile;
  Outfile qfile;
  Outfile pfile;
  Outfile likelihoodfile;
  Outfile deltaboundfile;

  Uint checkcondition;
  
  // New input format
  Infile genotypefile;
  Infile mapfile;
  Infile freqfile;
  Infile affectionfile;
  Infile chromosomefile;
  Chromolist *chromolist;
  Float controlprior;
  bool addendmarkers;
  Infile oldfreqfile;

  // QTL
  Infile traitvaluefile;
  bool printpairscores;
  
  Uint steps;
  Uint maxunloop;
  Uint maxlocusforoutput;
  Float maxsteplength;

  bool readprobfiles;
  bool writeprobfiles;
  bool calcspt;
  bool calcmpt;
  bool calcdpt;
  bool calcrpt;
  bool dohaplo;
  bool doforcedrecomb;
  bool skipfirstphase;
  bool foundercouples;
  bool nplexactp;
  bool lodexactp;
  bool entropy;
  bool sexspecific;
  bool nplpfile;
  bool sexlinked;
  bool markerdistzero;
  bool montecarlo;
  bool haplotypefixbits;
  bool hapasshaplotypes;
  bool compresshaplotypes;
  bool warnonmultiplemaxima;
  bool printoriginalalleles;
  bool pseudoautosomal;
  Optbool famfiles;
  bool faminfo;
  int unknownrepeat;
  string unknownrepeatstring;
  int shiftrepeats;
  Uint xoverrateprecision;
  Uint maxfamidlen;
  Uint maxperidlen;

  unsigned short int seed[3];
  
  bool allownegativevc;
  bool nosplitting;
  
  Unit unit;

  // simulation options
  Simulation *simulation;

  string swapdirname;
  bool swap;
  string inherdistfolder;
  Uint maxmem;
  bool estimatememory;
  
  // list of vectors whose location is to be found
  Stringvector modelmarkers;
private:
  bool traitfilerequired;
  
  void prefileline(ISTRINGSTREAM& ss);
  void datfileline(ISTRINGSTREAM& ss);
  void xoverline(ISTRINGSTREAM& ss);
  void uninformativeline(ISTRINGSTREAM& ss);
  void allowinconsistentline(ISTRINGSTREAM& ss);
  void modelline(ISTRINGSTREAM& ss);
  void nullfileline(ISTRINGSTREAM& ss);
  void probfileline(ISTRINGSTREAM& ss);
  void simulateline(ISTRINGSTREAM& ss);
  void haplotypeline(ISTRINGSTREAM& ss);
  void montecarloline(ISTRINGSTREAM& ss);
  void forcedxoversline(ISTRINGSTREAM& ss);
  void chromosomesline(ISTRINGSTREAM& ss);
  void pairwiseibdline(ISTRINGSTREAM& ss);
  void printoriginalallelesline(ISTRINGSTREAM& ss);
  void famfilesline(ISTRINGSTREAM& ss);
  void foundercountline(ISTRINGSTREAM& ss);
  void allhaploline(ISTRINGSTREAM& ss);
  void seedline(ISTRINGSTREAM &ss);
};

extern Options *options;

Double readnumericoption(const string &s, const string &t);

#endif // _OPTIONS
