#pragma once

#ifndef _SHORTREADGENOTYPE_H_DEFINED
#define _SHORTREADGENOTYPE_H_DEFINED

#include <vector>
using std::vector;

#include <string>
using std::string;

#include <utility>
using std::pair;

#include "LogProb.h"


class IndexRecord {
public:
	struct IndexRecordItem {
		const char	*SourceArray;
		long		index; };
private:
	long			_numAllocated;
	long			_numActive;
	IndexRecordItem	*_items;
	char			*_bufNucs;
	char			*_bufQualRead;
	char			*_bufQualAlign;
	bool			InsureSize( long NewSize );
public:
	IndexRecord()	{ _numAllocated=0; _numActive=0; _items=NULL; _bufNucs = _bufQualRead = _bufQualAlign = NULL; };
	~IndexRecord()	{ _numAllocated=0; _numActive=0; InsureSize( -1 ); };
	bool	Initialize( long NumItems, const char *DataSource );
	bool	Sort( bool Ascending , const char *Nucs, const char *ReadQual, const char *AlignQual );
	void	FetchBuffs( const char * &Reads, const char * &ReadQual, const char * &AlignQual ) {Reads = _bufNucs; ReadQual = _bufQualRead; AlignQual=_bufQualAlign; return; }
	inline long	FetchIndex( long Index )	{ return( _items[Index].index ); };
	inline long NumActive( void )			{ return( _numActive ); };
};

class ShortReadGenotype {
private:
	// proirs and state nam,es (labels) shared among all loci
	vector<string>		_diploidCombos;
	vector<double>		_diploidPriors;
	vector<LogProb>		_diploidPriorsLP;	// Cache these in LP format for computational efficency
	vector<string>		_haploidCombos;
	vector<double>		_haploidPriors;
	vector<LogProb>		_haploidPriorsLP;	// Cache these in LP format for computational efficency

	// These are temporary variables used to process each genetic loci (position on the genome)
	// NodeNucEff			_ShortReadData;
	// NodeNucEff			_HG18Read;
	// vector<StateSet>	_CurrentReads;
	// StateSet			_CondProbs;
	long				_totalReads[5];			// each character ACGTN
	long				_unvoidedReads[5];		// Each non-N character with alignment quality > 0 && seq quality > 0
	double				_partialReads[5];		// the effective number of non-N unvoided reads (Sum(1-AlignQuality)) for all ReadQuality > 0
	double				_effectiveReads[5];		// the effective number of non-N 'perfect' reads (Sum(1-Read Quality) * (1-AlignQuality)))
	double				_effectiveReadsSum;		// the effective number of non-N 'perfect' reads (Sum(1-Read Quality) * (1-AlignQuality)))
	//
	vector< pair<double,long> >	_sortedStates;	// Used to order states by highest posterior probability, for eventual output.

	// Values used in calculating data probability and posterior genotype probability
	// Cant use an array in a vector, have to use a struct to wrap the array. Sigh.
	struct NucStates {
		double v[5]; };
	vector< NucStates >	_readStates;		// probability of a given read of being in each state A,C,G,T or N. This is just a buffer.
	LogProb				_prob_DataGivenGenotype[10];
	LogProb				_prob_DataAndGenotype[10];
	double				_prob_GenotypeGivenData[10];
	LogProb				_prob_Data;
	bool				_hasSortedStates;
	IndexRecord			_qualSorter;

 
	long	CharToIndex( char a );
	void	SetDiploPriors( void );

	void	SetReadStates( long NumReads, const char *Reads, const char *ReadQual, const char *AlignQual );
	void	correlated_err_adjust( NucStates *StatesIn, NucStates *StatesOut, double *ThetaAdj, double &Theta, double &ThetaMin, int ReadChar, bool DoAdj );

	static double	_errorDistUnknown[4][4];
	static double	_errorDistIllumina[4][4];
	static double	_errorDistSanger[4][4];
	static double	_errorDist454[4][4];
	static double	_errorDistSolid[4][4];
	double			(*_errDist)[4][4];

public:
	ShortReadGenotype(void) {};
	virtual ~ShortReadGenotype(void) {};
	static void		GetHaploidPriors( vector<string> &StateNames, vector<double> &HaploidPriors, double Phi );
	static void		GetDiploidPriors( vector<string> &StateNames, vector<double> &DiploidPriors, double Phi, double Kappa , double P );
	static bool		ReadPriors( const string &FileName, vector<string> &HaploidNames, vector<double> &HaploidPriors, vector<string> &DiploidNames, vector<double> &DiploidPriors );
	static double	QualCharToReal( char A );
	static double	QualScoreToReal( long A );
	static long		NucCharToIndex( char A );
	static long		GetHomozygousDiploidIndex( long NucleotideIndex );
	static long		GetSNPClass( long RefHaploid, long SNPDiploid );
	bool Init( vector<string> DiploidCombos, vector<double> DiploidPriors, vector<string> HaploidStates, vector<double> HaploidPriors, int SequencingTechnology );
	bool SetData( const string &ShortReads, const string &ShortReadNucQuality, const string &ShortReadAlignQuality, double Theta, double ThetaCutoff, bool SortAscending );
	// bool ResizeStates( long StateSetSize );
	//LogProb GetPData();
	double	PDataNLL()						const { return( -_prob_Data.Log10() ); };
	double	GetPosterior( int i )			const { return( _prob_GenotypeGivenData[i] ); };
	double	GetCondLikelihoodNLL( int i )	const { return( -_prob_DataGivenGenotype[i].Log10()); };
	long	ReadsTotal()					const { long sum=0; for (int i=0;i<5;i++) sum+=_totalReads[i]; return( sum ); };	// _unvoidedReads
	long	ReadsEffective()				const { long sum=0; for (int i=0;i<5;i++) sum+=_unvoidedReads[i]; return( sum ); }; 
//	double	ReadsAmortized()				const { return( _effectiveReads ); };
	double	HapPrior( int i )				const { return( _haploidPriors[i] ); }; 
	string	HapName( int i )				const { return( _haploidCombos[i] ); }; 
	double	DipPrior( int i )				const { return( _diploidPriors[i] ); }; 
	string	DipName( int i )				const { return( _diploidCombos[i] ); }; 
	double	EffectiveReads()				const { return( _effectiveReadsSum ); };
	void	GatherCountData( double *Total, double *Unvoided, double *Partial, double *Effective ) const;
	void	GetSortedState(long i, long &StateID, double &ProbSgD );
	//string	GetStateName( long StateNum );
	//string	GetPStateGivenData( long StateNum );

};

#endif 
