// Modified for the new seq specs;

#include "nucleotable.h"
 
void index_to_tuple(int index, int l, int *skip, char *patt) {
  assert(l>0); assert(index>=0); assert(patt);
  
  patt[l]='\0';
  for (int i=l-1; i>=0; --i) {
    switch (skip[i]) {
    case 1:
      patt[i]='-';
      break;
    case 0:
      switch (index % 4) {
      case 0: patt[i]='a'; break;
      case 1: patt[i]='c'; break;
      case 2: patt[i]='g'; break;
      case 3: patt[i]='t'; break;
      default: patt[i]='X'; break;
      }
      index /= 4;
      break;
    case (int)'a':
    case (int)'c':
    case (int)'g':
    case (int)'t':
      patt[i]=skip[i];
      break;
    default:
      err("Invalid SKIP");
    }
  }
}


int tuple_to_index(int pos, int l, Sequence& seq, int *skip) {
  
  int factor=1; int sum=0;
  for (int i=l-1; i>=0; --i) {

    if (skip[i]) continue;
    
    if (seq[pos+i]>3 || seq[pos+i]<0) return -1;
    sum+=((int)seq[pos+i])*factor;
    factor*=4;
  }
  return sum;
}

int tuple_to_index(int l, int *array, int *skip) {
  
  int factor = 1; int sum=0;
  for (int i=l-1; i>=0; --i) {

    if (skip[i]) continue;

    if (array[i]>3 || array[i]<0) return -1;
    sum+=array[i]*factor;
    factor*=4;
  }
  return sum;
}

int tup_to_index(int l, int *array, int *skip) {  // temporary function, MUST REMOVE;
  
  int factor = 1; int sum=0;
  for (int i=l-1; i>=0; --i) {
    
    if (skip[i]) continue;
    
    if (array[i]>3 || array[i]<0) return -1;
    sum+=array[i]*factor;
    factor*=4;
  }
  return sum;
}

TupleTable::TupleTable() {
  tuple_length = true_length = table_length = 0;
  freqs0 = freqs1 = freqs2 = ranksI = NULL; 
  ranks0 = ranks1 = ranks2 = ranksI = NULL;
  skips = NULL;
}

TupleTable::~TupleTable() {
}


TupleTable::TupleTable(istream &fin) {
  tuple_length = true_length = table_length = 0;
  freqs0 = freqs1 = freqs2 = ranksI = NULL; 
  ranks0 = ranks1 = ranks2 = ranksI = NULL;
  skips = NULL;

  this->parse(fin);
}

void TupleTable::fastTupleProb(char* seq, int length, double* fprob, double* minFProb) {
  int index=0;
  int mask=(1<<(tuple_length<<1))-1;
  double total;

  for (int i=0;i<length;i++) {
    switch(seq[i]) {
    case 't': index++; 
    case 'g': index++;
    case 'c': index++;
    case 'a': {
      if (i>=tuple_length-1) {
	total=freqs0[index]+freqs1[index]+freqs2[index];
	fprob[i%3]+=(freqs0[index]?(((double)(freqs0[index]))/total):0);
	fprob[(i+1)%3]+=(freqs2[index]?(((double)(freqs2[index]))/total):0);
	fprob[(i+2)%3]+=(freqs1[index]?(((double)(freqs1[index]))/total):0);
	minFProb[i%3]+=1/MAX((double)freqs0[index],SMALL_VALUE);
	minFProb[(i+1)%3]+=1/MAX((double)freqs2[index],SMALL_VALUE);
	minFProb[(i+2)%3]+=1/MAX((double)freqs1[index],SMALL_VALUE);
      }
      index=(index<<2)&mask;
      break;
    }
    default: i+=tuple_length;
    }
  }
}


double TupleTable::tupleProbMinusOne(int frame, int index) {
  assert(frame>=0 && frame <=3);
  assert(index>=0 && index<table_length);
  
  switch(frame) {
  case FR0:
    if (!freqs0[index])   return 0; 
    if (freqs0[index]==1) return (1.0 / (double) totals[0]);
    else                  return ((double)(freqs0[index]-1)) / ((double) totals[0]);
  case FR1:
    if (!freqs1[index])   return 0;
    if (freqs1[index]==1) return (1.0 / (double) totals[1]);
    else                  return ((double)(freqs1[index]-1)) / ((double) totals[1]);
  case FR2:
    if (!freqs2[index])   return 0;
    if (freqs2[index]==1) return (1.0 / (double) totals[2]);
    else                  return ((double)(freqs2[index]-1)) / ((double) totals[2]);
  case INTR:
    if (!freqsI[index])   return (1.0 / (double) totals[3]);
    if (freqsI[index]==1) return (1.0 / (double) totals[4]);
    else                  return ((double)(freqsI[index]-1)) / ((double) totals[3]);
  default:
    return 0;
  }
}

double TupleTable::tupleProb(int frame, int index) {
  assert(frame>=0 && frame <=3);
  assert(index>=0 && index<table_length);
  
  switch(frame) {
  case FR0:
    return ((double)(freqs0[index])) / ((double) totals[0]);
  case FR1:
    return ((double)(freqs1[index])) / ((double) totals[1]);
  case FR2:
    return ((double)(freqs2[index])) / ((double) totals[2]);
  case INTR:
    return ((double)(freqsI[index])) / ((double) totals[3]);
  default:
    return 0;
  }
}

double TupleTable::tupleProb(int pos, Sequence &seq, int exon) {
  int index = tuple_to_index(pos,tuple_length,seq,skips);
  if (index==-1) {
    cout << "Unknown base around: " << pos << endl;
    return 0;
  }
  if (index<0) cout << "index less than 0: " << index << endl;
  if (index>=table_length) cout << "index > table_length: " << index << endl;
  assert(index>=0 && index<table_length);
  if (exon) return ((double)(freqs0[index]+freqs1[index]+freqs2[index]+1))/
	      ((double)(totals[0]+totals[1]+totals[2]+1));
  else return ((double)(freqsI[index]+1))/((double)(totals[3]+1));
}

double TupleTable::tupleProb(int frame, int pos, Sequence &seq) {
  int index = tuple_to_index(pos,tuple_length,seq,skips);
  if (index==-1) {
    cout << "Unknown base around: " << pos << endl;
    return 0;
  }
  if (index<0) cout << "index less than 0: " << index << endl;
  if (index>=table_length) cout << "index > table_length: " << index << endl;
  return tupleProb(frame, index);
}


void TupleTable::print_skips() {
  for (int i=0; i<tuple_length; ++i)
    cout << skips[i] << " ";
  cout << endl;
}

void TupleTable::print_skips(ofstream &fout) {
  for (int i=0; i<tuple_length; ++i)
    fout << skips[i] << " ";
  fout << endl;
}

istream& TupleTable::parse(istream &in) {
  string buf;
  assert (in.good());
  int i=0;
  in >> tuple_length;
  in >> buf; // dummy should contain the pattern of skips;
  assert ((int)buf.size() >= (int)tuple_length);
  skips = new int[tuple_length];
  true_length=tuple_length;
  for (i=0; i<tuple_length; ++i) {
    if (buf[i]=='-') {
      skips[i]=1;
      true_length--;
    }
    else if (buf[i]=='x') skips[i]=0;
    else {
      cout << "Warning, bad skips pattern; position skipped" << endl;
      buf[i]=1;
      true_length--;
    }
  }
  assert(true_length > 2 && true_length < 12);
  table_length = power(4,true_length);
  
  in >> totals[0];
  in >> totals[1];
  in >> totals[2];
  in >> totals[3];
  
  freqs0 = new int[table_length];
  freqs1 = new int[table_length];
  freqs2 = new int[table_length];
  freqsI = new int[table_length];
  
  for (i=0; i<table_length; ++i) {
    assert(cin.good());
    in >> freqs0[i];
    in >> freqs1[i];
    in >> freqs2[i];
    in >> freqsI[i];
  }
  return in;
}

istream& operator>>(istream &cin, TupleTable &tt) {
  return tt.parse(cin);
}

ostream& operator<<(ostream &out, TupleTable &tt) {
  out << tt.tuple_length << endl;
  tt.true_length=0;
  int i;
  for(i=0;i<tt.tuple_length;i++) {
    if (tt.skips[i]) out << '-';
    else {
      tt.true_length++;
      out << 'x';
    }
  }
  out << endl;
  for(i=0;i<4;i++) out << tt.totals[i] << " ";
  out << endl;
  
  for(i=0;i<tt.table_length;++i) {
    out << tt.freqs0[i] << " " << tt.freqs1[i] << " ";
    out << tt.freqs2[i] << " " << tt.freqsI[i] << endl;
  }
  return out;
}

int TupleTable::plusSequence(Sequence& seq) {
  Sequence::size_type i = 0;
  int index = 0;
  int *marks = new int[seq.size()+1];
  for (i=0; i<=seq.size(); marks[i++]=GARB);

  //mark_sequence(seq,tuple_length,marks);

  for (i=1; i<=seq.size()-tuple_length+1;++i) {
    index = tuple_to_index(i,tuple_length,seq,skips);
    if (index==-1) continue;
    assert(index<table_length);
    
    switch(marks[i]) {
    case FR0:  freqs0[index]++; totals[0]++; break;
    case FR1:  freqs1[index]++; totals[1]++; break;
    case FR2:  freqs2[index]++; totals[2]++; break;
    case INTR: freqsI[index]++; totals[3]++; break;
    default:   break;
    }
  }
  return 1;
}

int TupleTable::minusSequence(Sequence& seq) {
  // Effects: "subtracts" the sequence "seq" from "this".;
  //          Returns 1 if everything is OK;
  //          If an attempt is made to make some frequency < 0, returns 0.;

  Sequence::size_type i=0;
  int retval=1,index=0;
  int *marks = new int[seq.size()+1];
  for (i=0; i<=seq.size(); marks[i++]=GARB);

  // mark_sequence(seq,tuple_length,marks);
  
  for (i=1; i<=seq.size()-tuple_length+1; ++i) {
    index = tuple_to_index(i,tuple_length, seq,skips);
    if (index==-1) continue;
    assert(index<table_length);
    assert(freqs0 && freqs1 && freqs2 && freqsI);

    int* freqs=NULL;
    switch(marks[i]) {
    case FR0: freqs=freqs0; break;
    case FR1: freqs=freqs1; break;
    case FR2: freqs=freqs2; break;
    case INTR: freqs=freqsI; break;
    default: break;
    }
    if (freqs)
      if (freqs[index]==0) {
	cout << "SKIPS: "; print_skips(); cout << endl;
	char buf[100];
	index_to_tuple(index,tuple_length,skips,buf);
	cout << "Pattern: " << buf << endl;
	cout << "Warning: in minusSequence, unused sequence passed." << endl;
	retval=0;
      } else {
	freqs[index]--;
	switch(marks[i]) {
	case FR0: totals[0]--; break;
	case FR1: totals[1]--; break;
	case FR2: totals[2]--; break;
	case INTR: totals[3]--; break;
	default: break;
	}
      }
  }
  return retval;
}

double TupleTable::relativeFrequency(Sequence& seq, int pos, int frame) {
  // Effects: Returns fi/(f0+f1+f2), and should be used when not in an exon region that is counted in the table;
  return calcTupleRelFreq(seq,pos,frame,1,0,0);
}
double TupleTable::relativeFrequencyMinusOne(Sequence& seq, int pos, int frame) {
  // Effects: Should be used when in an exon region that is counted in the table;
  return calcTupleRelFreq(seq,pos,frame,1,0,1);
}
double TupleTable::relativeFrequencyEverywhere(Sequence& seq, int pos, int frame) {
  // Effects: Here intron frequencies are taken into account, penalizing tuples that are very frequent in introns;
  return calcTupleRelFreq(seq,pos,frame,1,1,0);
}
double TupleTable::relativeFrequencyEverywhereMinusOne(Sequence& seq, int pos, int frame) {
  // Effects: As previous, except it should be used only in exon regions that are counted in the table;
  return calcTupleRelFreq(seq,pos,frame,1,1,1);
}

double TupleTable::logFreq(Sequence& seq, int pos, int frame) {
  return LOG(MAX(SMALL_VALUE,tupleProb(frame,pos,seq)))-LOG(MAX(SMALL_VALUE,tupleProb(INTR,pos,seq)));
}

double TupleTable::calcTupleRelFreq(Sequence& seq, int pos, int frame,
				    double exonWeight, double intronWeight, double penalty) {
  int index = tuple_to_index(pos,tuple_length,seq,skips);
  return calcTupleRelFreq(index,frame,exonWeight,intronWeight,penalty);
}

double TupleTable::calcTupleRelFreq(int tupleIndex, int frame,
				    double exonWeight, double intronWeight, double penalty) {
  // Requires: penalty in(FR0,FR1,FR2)
  // Effects: Calculates the relative frequencies of the tuple given that tupleIndex was already added to the;
  //          database.;
  //
  //                        f_i(t)-intronWeight*(f_I(t)+penalty);
  // -------------------------------------------------------------------------;
  //                     exonWeight*(f_0(t)+f_1(t)+f_2(t)+2*penalty);
  //
  //  This is a modified version as of Sep 1;

  double f_i,freqI = ((double)freqsI[tupleIndex]) / ((double)totals[3]);
  
  switch(frame) {
  case FR0:
    f_i  = freqs0[tupleIndex];
    f_i /= (double)totals[0];
    break;
  case FR1:
    f_i  = freqs1[tupleIndex];
    f_i /= (double)totals[1];
    break;
  case FR2:
    f_i  = freqs2[tupleIndex];
    f_i /= (double)totals[2];
    break;
  default: assert(1==0); // (penalty !in(FR0,FR1,FR2))
  }
  


  
  if (penalty==0)
      return MAX(SMALL_VALUE,
        ( f_i - intronWeight*freqI ) / 
      (exonWeight*(((double)freqs0[tupleIndex])/((double)totals[0]) + 
  ((double)freqs1[tupleIndex])/((double)totals[1]) + 
  ((double)freqs2[tupleIndex])/((double)totals[2]) + 
    		    0)));



  else if (penalty==1)
  return 
    ( f_i - intronWeight*freqI ) / 
    (exonWeight*(((double)freqs0[tupleIndex])/((double)totals[0]) + 
		 ((double)freqs1[tupleIndex])/((double)totals[1]) + 
		 ((double)freqs2[tupleIndex])/((double)totals[2]) + 
		 0));
  else
    return 0;

}

  
istream& operator>>(istream &in, LongTupleTable &ltt) 
{
  char buf[100];
  int  bufint[100];
  int  skips[100];
  arrayZero(skips,100);

  int l1 = ltt.begin_length;
  int l2 = ltt.end_length;

  while(in.good()) 
    {
      in >> buf;
      if(buf[0] != 'a' && buf[0] != 'c' && buf[0] != 'g' && buf[0] != 't') 
	{
	  continue;
	}
      int k = 0;
      for(int i=0; i<l1+l2+k; ++i) 
	{
	  if (buf[i] != 'a' && buf[i] != 'c' && buf[i] != 'g' && buf[i] != 't') {
	    k++; continue;
	  }
	  bufint[i] = (int)char2nucl(buf[i]);
	}
      int idx1 = tup_to_index(l1, bufint,      skips);
      assert(idx1>=0);
      int idx2 = tup_to_index(l2, bufint + l1, skips);
      if (idx2 < 0) {
	cout << l1 << " " << l2 << " " << buf << endl;
      }
      assert(idx2>=0);
      ltt.add_entry(idx1,idx2);
    }
  return in;
}

ostream& operator<<(ostream &out, LongTupleTable &ltt) 
{
  char buf1[100];
  char buf2[100];
  int  skips[100];
  arrayZero(skips,100);
  int l1 = ltt.begin_length;
  int l2 = ltt.end_length;
  int N = ltt.arraylength;

  for(int i = 0; i < N; ++i) 
    {
      if (!ltt.tupleTable[i]) continue;
      index_to_tuple(i,l1,skips,buf1);
      TupleEnding *te = ltt.tupleTable[i];
      while (te) {
	index_to_tuple(te->ending,l2,skips,buf2);
	cout << buf1 << buf2 << " " << te->freq << endl;
	te = te->next;
      }
    }
  return out;
}


// End of header file





















