#define _CRT_SECURE_NO_WARNINGS

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "PileupReader.h"

#define ISNUM( a ) ( ((a)>='0') && ((a)<='9') )



PileupReader::PileupReader(void) {
	_fileName		= "";
	_fin			= NULL;
	_lineCount		= (ulong) 0;
	_lineValid		=	 false;
	_stringBufSize	= -1;
	_nucsRead		= NULL;;
	_nucsQuality	= NULL;
	_alignQuality	= NULL;
	_readBuffer		= NULL;
	_debug_LinesRead = 0.0;
	_debug_LinesDiscarded = 0.0;
	EnforceBufferCapacity( 64000 );
}

PileupReader::~PileupReader(void) {
	FileClose();
	if (_nucsRead		!= NULL ) { free(_nucsRead);		_nucsRead = NULL; }
	if (_nucsQuality	!= NULL ) { free(_nucsQuality);		_nucsQuality = NULL; };
	if (_alignQuality	!= NULL ) { free(_alignQuality);	_alignQuality = NULL; };
	if (_readBuffer		!= NULL ) { free(_readBuffer);		_readBuffer = NULL; };
	return;
}

unsigned long
PileupReader::TrimInvalid( double *NumTrimmed ) {
	unsigned long num_bases = _numReads;
	unsigned long i, cur_pos, num_deleted;
	bool is_valid;
	char r, rq, aq;

	cur_pos = 0;
	for (i=0; i<num_bases; i++ ) {
		r	= _nucsRead[i];
		rq	= _nucsQuality[i];
		aq	= _alignQuality[i];
		is_valid = ( (r !='N') && (rq>'!') && (aq>'!') );
		if (!is_valid) {
			NumTrimmed[ BaseToIndex( r ) ]++;
		} else {
			_nucsRead[cur_pos]		= r;
			_nucsQuality[cur_pos]	= rq;
			_alignQuality[cur_pos]	= aq;
			cur_pos++; }
	}

	// insure null termination of strings.
	_nucsRead[cur_pos]		= 0;
	_nucsQuality[cur_pos]	= 0;
	_alignQuality[cur_pos]	= 0;

	// calculate num deleted and reset string length
	num_deleted			= _numReads - cur_pos;
	_numReadsTrimmed	= (long) num_deleted;
	_numReads			= cur_pos;
	return( num_deleted );
}

bool
PileupReader::EnforceBufferCapacity( long NewSize ){
	if (NewSize <= _stringBufSize) return( true );
	// TODO check for failure here...
	_nucsRead		= (char *) realloc( _nucsRead,		NewSize );
	_nucsQuality	= (char *) realloc( _nucsQuality,	NewSize );
	_alignQuality	= (char *) realloc( _alignQuality,	NewSize );
	_readBuffer		= (char *) realloc( _readBuffer,	10*NewSize );	// input lines may contain indels, quality scores and other discardable characers... buffer then while we process
	_stringBufSize	= NewSize;
	return( true );
}



bool
PileupReader::FileClose(void) {
	if (!IsFileOpen()) return( true );
	if ( _fin != stdin ) fclose( _fin );
	_fin = NULL;			// TODO Check return Val here...
	return( true );
}
	
bool
PileupReader::FileOpen(const std::string &NewFileName) {
	if (!IsFileOpen()) FileClose();
	if ((NewFileName == "") || (NewFileName == "-") ) {
		_fin = stdin; 
	} else { 
		_fin = fopen( NewFileName.c_str(),"r");
	};

	if (_fin == NULL) return( false);
	_fileName		= NewFileName;
	_lineCount		= 0;
	_allLineCount	= 0;
	_lineValid		= false;
	return( true );
}

void
PileupReader::MyAbort( void ){
	printf("Critical Error!\n");
	printf("Aborting Now!\n");
	assert(1==2);  //-- for debugging.
	exit( -1 );
	return;
}

bool
PileupReader::FetchNextLine( bool &HitEOF ) {
	char	*buf = _readBuffer;
	bool	reading_header;
	long	num_read;
	bool	ok;
	char	ref_base;
	unsigned long	seq_pos = 0;

	HitEOF = false;
	if (!IsFileOpen()) return( false );

	// Invalidate current state...
	_lineValid			= false;
	_numReads			= 0;
	_numReadsTrimmed	= 0;

	reading_header = true;
	while (reading_header) {
		num_read = fscanf(_fin," %s %lu %c",buf,&seq_pos, &ref_base);
		if ( num_read != 3) {
			// This is normal, we just read past end of file....
			if (feof(_fin)) { FileClose(); HitEOF = true; return( false ); };
			// Otherwise .... this is an error
			fprintf(stderr,"ERR: fscanf-1\n NumRead = %ld (3)\n%lu - last line #\nLast Line: \n",(long)num_read,_allLineCount);
			this->PrintLine( stderr );
			MyAbort();
			return( false );}
		_allLineCount++;
		_debug_LinesRead++;
		reading_header = (ref_base == '*') || ( (strcmp(buf,_seqName.c_str())==0) && (seq_pos==_seqPos) );
		// if we get a repeat chromosome name and line number, then it is an INdel line, we don't deal with these yet, so skip it 
		//		and go on to the first line with a different identifier...
		if (reading_header) {
			_debug_LinesDiscarded++;
			(void) fgets(buf,_stringBufSize,_fin); }
		// not an error, the last line was jsut an indel line...
		if (feof(_fin)) {
			FileClose(); HitEOF = true; return( false ); };
	}


	_seqName	= buf;
	_seqPos		= seq_pos;
	num_read	= fscanf(_fin," %c %ld %ld %ld %ld ",&_concensusBase,&_concensusQuality,&_snpQuality,&_maxMappingQuality,&_numReads);
	// Check for error...
	if (num_read != 5) { 
		// Otherwise .... this is an error
		fprintf(stderr,"ERR: fscanf-2\n NumRead = %ld (5)\n%lu - last line #\nLast Line: \n",(long)num_read,_allLineCount);
		this->PrintLine( stderr );
		MyAbort(); return( false );}
	_refBase	= toupper( ref_base );
	EnforceBufferCapacity(_numReads+4 );
	// Incase of a realloc... fetch this again.....
	buf = _readBuffer;

	// Process Nucleotides: Now the fun part
	num_read = fscanf(_fin," %s ",buf);
	if (num_read != 1) { 
		fprintf(stderr,"ERR: fscanf-3\n NumRead = %ld (1)\n%lu - last line #\nLast Line: \n",(long)num_read,_allLineCount);
		this->PrintLine( stderr );
		MyAbort(); return( false );}
	ok = ProcessNucsRead( buf );
	if (!ok) { 
		fprintf(stderr,"ERR: fscanf-3a - Nuc Reads\n NumRead = %ld (1)\n%lu - last line #\nLast Line: \n",(long)num_read,_allLineCount);
		MyAbort(); return( false );}

	// Process Read Qualities: More Fun
	num_read = fscanf(_fin," %s ",buf);
	if (num_read != 1) { 
		fprintf(stderr,"ERR: fscanf-4\n NumRead = %ld (1)\n%lu - last line #\nLast Line: \n",(long)num_read,_allLineCount);
		this->PrintLine( stderr );
		MyAbort(); return( false );}
	ok=ProcessNucsQuality( buf );
	if (!ok) { 
		fprintf(stderr,"ERR: fscanf-4a - Nuc Quality\n NumRead = %ld (1)\n%lu - last line #\nLast Line: \n",(long)num_read,_allLineCount);
		MyAbort(); return( false );}

	// Process Alignment Qualities: YET more fun...
	num_read = fscanf(_fin," %s ",buf);
	if (num_read != 1) { 
		fprintf(stderr,"ERR: fscanf-5\n NumRead = %ld (1)\n%lu - last line #\nLast Line: \n",(long)num_read,_allLineCount);
		this->PrintLine( stderr );
		MyAbort(); return( false );}
	ok = ProcessAlignQuality( buf );
	if (!ok) { 
		fprintf(stderr,"ERR: fscanf-5a - Align Quality\n NumRead = %ld (1)\n%lu - last line #\nLast Line: \n",(long)num_read,_allLineCount);
		MyAbort(); return( false );}
	
	// Whew! Made It
	_lineCount++;
	_lineValid = true;

	return( true );
}


bool
PileupReader::ProcessNucsRead(char *ReadBuffer) {
	long len = (long ) strlen(ReadBuffer);
	long pos_out, pos_in;
	char	c, cout;
	// This should be a null terminated string

	pos_out = 0;
	for (pos_in=0; pos_in<len; pos_in++) {
		c	= ReadBuffer[pos_in];
		c	= toupper( c );			// dont worry about sense / antisense for now
		switch( c ) {
			// By Far the Most Common
			case '.' : 
			case ',' : cout = _refBase; break;	// Accept a sense / antisense concensus sequence element
			// Not Uncommon
			case '^' : pos_in++; continue;		// Start of a new sequence, skipped character is seqalignment qual, following char is actual fist char of read.
			case '$' : continue;				// end of sequence (just past the last char of read) not an actual nucleotide
			case 'A' :
			case 'C' :
			case 'G' :
			case 'T' : 
			case 'N' : cout = c; break;
			// Uncommon
			case '*' : cout = 'N'; break;		// the read had a del here, there is no actual base to align to the ref seq.
			case '+' :							// process an insert. The read seq has more charcters than the referecne sequence. Inserted chars between current ref pos and next.
			case '-' :							// process a  delete. Current read pos is, OK but next reference position pos will have a * for this read
#ifndef USE_OLD
				// prior to V2.15 could only handle indels of upto 99 charactrers... this is the new version
				{
					char *start_pos = &(ReadBuffer[pos_in+1]), *end_pos = NULL;
					long skip_size = 0;
					skip_size	 = strtol( start_pos, &end_pos, 10);
					skip_size	+= (end_pos - start_pos );
					pos_in		+= skip_size;
				}
#else
				// Version from <= V2.14
				if (ISNUM(ReadBuffer[pos_in+2])) {
					skip_size	 = (ReadBuffer[pos_in+1] - '0') * 10 + (ReadBuffer[pos_in+2] - '0');	// size of insert
					pos_in		+= (skip_size +2);					// Skip to just before next character
				} else {
					skip_size	 = ReadBuffer[pos_in+1] - '0';		// size of insert
					pos_in		+= (skip_size +1);					// Skip to just before next character
				};
#endif
				continue;
			default:
				// Should never happen.
				fprintf(stderr,"PNucRead, Unknown CHar %ld %ld %c\n LastLine:\n",(long)pos_in,(long)ReadBuffer[pos_in],c);
				this->PrintLine( stderr );
				MyAbort();	// Unknown characters are errors!
				return( false );
		}
		_nucsRead[pos_out] = cout; pos_out++;
	}
	if (pos_out != _numReads ) {
		fprintf(stderr,"PNucRead, pos_out != numReads %ld %ld\n",(long)pos_out,(long)_numReads);
		this->PrintLine( stderr );
		// This is an error
		MyAbort(); 
		return( false ); }

	_nucsRead[_numReads] = 0;	// Null terminate it.

	return( true );
}

bool
PileupReader::ProcessNucsQuality(char *ReadBuffer) {
	long len = (long ) strlen(ReadBuffer);
	// This should be a Simple! 
	if (len != _numReads ) {
		fprintf(stderr,"PNucQual, pos_out != numReads %ld %ld\n",(long)len,(long)_numReads);
		this->PrintLine( stderr );
		MyAbort();
		return( false ); }
	// should we do sanity checking heere?
	strncpy(_nucsQuality,ReadBuffer,_numReads);
	_nucsQuality[_numReads] = 0;	// Null Temrinate it
	return( true );
}

bool
PileupReader::ProcessAlignQuality(char *ReadBuffer) {
	long len = (long ) strlen(ReadBuffer);
	//long pos_out, pos_in;
	// This should be a Simple! 
	if (len != _numReads ) {
		fprintf(stderr,"PNucAQual, pos_out != numReads %ld %ld\n",(long)len,(long)_numReads);
		this->PrintLine( stderr );
		MyAbort();
		return( false ); }
	// should we do sanity checking heere?
	strncpy(_alignQuality,ReadBuffer,_numReads);
	_alignQuality[_numReads] = 0;	// Null Terminate IT
	return( true );
}

	
void	
PileupReader::GetBases( string &ShortReads, string &ShortReadQual, string &ShortReadAlnQual, char &ReferenceRead ) const {
	ShortReads					= _nucsRead;
	ShortReadQual				= _nucsQuality;
	ShortReadAlnQual			= _alignQuality;
	ReferenceRead				= _refBase;
	// RefReadQual					= 1.0;		// Not provided by this file, get it from elseqhere. Sigh... why won't HGP release qualities for HG18?
	return;
}


bool	
PileupReader::GetMAQCalls( char &ConcensusCharacter, long &ConcensusQuality, long &SnpQuality, long &RmsMapQuality ) const {
	ConcensusCharacter		= _concensusBase;
	ConcensusQuality		= _concensusQuality;
	SnpQuality				= _snpQuality;
	RmsMapQuality			= _maxMappingQuality;
	return( true );
}

bool
PileupReader::PrintLine( FILE *F ) const {
	// TODO deal with failure,...
	fprintf(F,"%s\t%lu\t%c",_seqName.c_str(),_seqPos,_refBase);
	fprintf(F,"\t%c\t%ld\t%ld\t%ld\t%ld",_concensusBase,_concensusQuality,_snpQuality,_maxMappingQuality,_numReads);
	fprintf(F,"\t%s\t%s\t%s\n",_nucsRead,_nucsQuality,_alignQuality);
	return( true );
}
 
