#include <stdio.h>
#include <cstdlib>
#include <list>
#include <vector>
#include <fstream>
#include <stdlib.h>
#include <iostream>
#include <iomanip>
#include <math.h>
#include <string>
#include "PGTwoIII_PerProtein.h"
#include "PGTwoIII_PD.h"

using namespace std;



void mining(ProjectedDatabase & PositiveDataset,
            const vector <string>& SeqPositive,
            ProjectedDatabase & NegativeDataset,
            const vector <string>& SeqNegative,
            const string & AASubType,
            const double & Min_Support_Diff,
            const int & Min_Pat_Length,
            const int & Min_Evaluate,
            ofstream & outfSeq);

int main(int argc, char *argv[])
{
  if (argc != 8) {
    cout << "Welcome to PGOneIII (mining type III patterns by pattern growth from one dataset.\n\n"
         << "7 arguments are required:\n"
         << "1. The first argument should be the minimal number \n"
         << "   of frequent items in one pattern to be reported; (3)\n"
         << "2. The second argument should be the minimal support difference; (0.8)\n"
         << "3. The third argument should be the length of window to search; (10)\n"
         << "4. The fourth argument should be the minimal number of frequent\n"
         << "   items in the patterns before evaluating support difference; (3)\n"
         << "5. The forth argument should be the pathway and filename\n"
         << "   of the positive dataset in fasta format;\n"
         << "6. The fifth argument should be the pathway and filename\n"
         << "   of the negative dataset in fasta format;\n"
         << "7. The sixth argument should be the pathway and filename\n"
         << "   of output file." << endl;

    return 0;
  }

    int Min_Pat_Length = atoi(argv[1]);
    double Min_Support_Diff = atof(argv[2]);
    int Window = atoi(argv[3]);
    int Min_Evaluate = atoi(argv[4]);
    
    ifstream  inf_Seq_Positive(argv[5]);
    if (!inf_Seq_Positive)
      cout << "Sorry, cannot find the file: " << argv[5] << endl;

    ifstream  inf_Seq_Negative(argv[6]);
    if (!inf_Seq_Negative)
      cout << "Sorry, cannot find the file: " << argv[6] << endl;

    char OutputFilename[50];
    strcpy(OutputFilename, argv[7]);
    ofstream outfSeq(OutputFilename);
    if (!outfSeq)
      cout << "Sorry, cannot write to the file: " << argv[7] << endl;

  const string AASubType = "GASTCVLIMPFYWDENQHKR";
  
  // ######################### input sequences database ########################
  vector < string > SeqPositive;
  vector < string > SeqNegative;
  string TempName, TempStr;
  while ( inf_Seq_Positive >> TempName >> TempStr ) {
     if (!TempStr.empty())
          SeqPositive.push_back( TempStr + "#");
  };
  while ( inf_Seq_Negative >> TempName >> TempStr ) {
     if (!TempStr.empty())
          SeqNegative.push_back( TempStr + "#");
  };

  cout << "Size of the positive database: " << SeqPositive.size() << endl;
  cout << "Size of the negative database: " << SeqNegative.size() << endl;

  // ##################### end of input sequences database #####################

  // ##################### prefixspan ##########################################
  ProjectedDatabase EmptyProData;

  for ( int AAIndex = 0; AAIndex < 20; AAIndex++ ){
        ProjectedDatabase ProDataPositive(EmptyProData);
        ProjectedDatabase ProDataNegative(EmptyProData);
        //cout << "Initiate projected database for " << AASubType[AAIndex] << endl;
        ProDataPositive.InitiateProData(AASubType[AAIndex], SeqPositive, Window);
        ProDataNegative.InitiateProData(AASubType[AAIndex], SeqNegative, Window);
        cout << "Mining projected database for Prefix " << AASubType[AAIndex] << endl;
        mining(ProDataPositive,
               SeqPositive,
               ProDataNegative,
               SeqNegative,
               AASubType,
               Min_Support_Diff,
               Min_Pat_Length,
               Min_Evaluate,
               outfSeq);
  }

  cout << "End of this run!" << endl;
  return 0;
}

void mining(ProjectedDatabase & PositiveDataset,
            const vector <string>& SeqPositive,
            ProjectedDatabase & NegativeDataset,
            const vector <string>& SeqNegative,
            const string & AASubType,
            const double & Min_Support_Diff,
            const int & Min_Pat_Length,
            const int & Min_Evaluate,
            ofstream & outfSeq) {
  double SupportPositive = (double)PositiveDataset.GetSupport() / SeqPositive.size();
  double SupportNegative = (double)NegativeDataset.GetSupport() / SeqNegative.size();

  if (
       PositiveDataset.GetPrefixSize() >= Min_Evaluate
      &&
       SupportPositive - SupportNegative < Min_Support_Diff
     )
  return;

  if (PositiveDataset.GetPrefixSize() >= Min_Pat_Length) {
    vector <char> OutPrefix = PositiveDataset.GetPrefix();
    for (int PrefixIndex = 0; PrefixIndex < (int)(OutPrefix.size()); PrefixIndex++) {
      outfSeq << OutPrefix[PrefixIndex];
      if (PrefixIndex!= (int)(OutPrefix.size()-1))
        outfSeq << "->";
    }
    outfSeq << "\t" << SupportPositive << " - " << SupportNegative
            << " = " << SupportPositive - SupportNegative << endl;
  }
  
    for (int AAIndex = 0; AAIndex < 20; AAIndex++ ) {
      ProjectedDatabase TempProDataPositive(PositiveDataset);
      ProjectedDatabase TempProDataNegative(NegativeDataset);
      TempProDataPositive.UpdateProData(AASubType[AAIndex], SeqPositive);
      TempProDataNegative.UpdateProData(AASubType[AAIndex], SeqNegative);
      mining(TempProDataPositive,
             SeqPositive,
             TempProDataNegative,
             SeqNegative,
             AASubType,
             Min_Support_Diff,
             Min_Pat_Length,
             Min_Evaluate,
             outfSeq);
    }
}






