#include <stdio.h>
#include <cstdlib>
#include <list>
#include <vector>
#include <fstream>
#include <stdlib.h>
#include <iostream>
#include <iomanip>
#include <math.h>
#include <string>
#include "PGOneIII_PerProtein.h"
#include "PGOneIII_PD.h"

using namespace std;



void mining(ProjectedDatabase & TempProData, const vector <string>& SequencesDatabase, const string & AASubType, const double & MinSupRatio, const int & Min_Pat_Length, ofstream & outfSeq);

int main(int argc, char *argv[])
{
  if (argc != 6) {
    cout << "Welcome to PGOneIII (mining type III patterns by pattern growth from one dataset.\n\n"
         << "5 arguments are required:\n"
         << "1. The first argument should be the minimal number \n"
         << "   of frequent items in one pattern should be reported; (3)\n"
         << "2. The second argument should be the minimal proportion\n"
         << "   of proteins have the pattern; (0.8)\n"
         << "3. The third argument should be the length of window to search(20)\n"
         << "4. The forth argument should be the pathway and filename\n"
         << "   of protein sequences in fasta format;\n"
         << "5. The fifth argument should be the pathway and filename\n"
         << "   of output file." << endl;

    return 0;
  }

    int Min_Pat_Length = atoi(argv[1]);
    double Min_Sup_ratio = atof(argv[2]);
    int Window = atoi(argv[3]);
    
    ifstream  inf_Seq(argv[4]);
    if (!inf_Seq)
      cout << "Sorry, cannot find the file: " << argv[4] << endl;

    char OutputFilename[50];
    strcpy(OutputFilename, argv[5]);
    ofstream outfSeq(OutputFilename);
    if (!outfSeq)
      cout << "Sorry, cannot write to the file: " << argv[5] << endl;

  
  const string AASubType = "GASTCVLIMPFYWDENQHKR";
  
  // ######################### input sequences database ########################
  vector < string > SequencesDatabase;
  string TempName, TempStr;
  while ( inf_Seq >> TempName >> TempStr )
  {
     if (!TempStr.empty())
          SequencesDatabase.push_back( TempStr + "#");
  };
  cout << "Size of the database: " << SequencesDatabase.size() << endl;

  // ##################### end of input sequences database #####################

  // ##################### prefixspan ##########################################
  ProjectedDatabase EmptyProData;

  for ( int AAIndex = 0; AAIndex < 20; AAIndex++ )
    {
        ProjectedDatabase ProData(EmptyProData);
        ProData.InitiateProData(AASubType[AAIndex], SequencesDatabase, Window);
        cout << "Mining projected database for Prefix " << AASubType[AAIndex] << endl;
        mining(ProData, SequencesDatabase, AASubType, Min_Sup_ratio, Min_Pat_Length, outfSeq);

    }
  cout << "End of this run!" << endl;
  return 0;
}

void mining( ProjectedDatabase & TempProData, const vector <string>& SequencesDatabase, const string & AASubType, const double & MinSupRatio, const int & Min_Pat_Length, ofstream & outfSeq) {

  if ( TempProData.GetSupport() < SequencesDatabase.size() * MinSupRatio ) return;

  if (TempProData.GetPrefixSize() >= Min_Pat_Length) {
    vector <char> OutPrefix = TempProData.GetPrefix();
    for (int PrefixIndex = 0; PrefixIndex < (int)(OutPrefix.size()); PrefixIndex++) {
      outfSeq << OutPrefix[PrefixIndex];
      if (PrefixIndex!= (int)(OutPrefix.size()-1))
        outfSeq << "->";
    }
    outfSeq << "\t" << TempProData.GetSupport() << endl;
  }
  
    for (int AAIndex = 0; AAIndex < 20; AAIndex++ ) {
      ProjectedDatabase ProData(TempProData);
      ProData.UpdateProData(AASubType[AAIndex], SequencesDatabase);
      mining(ProData, SequencesDatabase, AASubType, MinSupRatio, Min_Pat_Length, outfSeq);
    }
  
}






