#include <stdio.h>
#include <cstdlib>
#include <list>
#include <vector>
#include <fstream>
#include <stdlib.h>
#include <iostream>
#include <iomanip>
#include <math.h>
#include <string>
#include "PGOneII_PerProtein.h"
#include "PGOneII_PD.h"

using namespace std;



void mining(ProjectedDatabase & TempProData,
            const vector <string>& SequencesDatabase,
            const string & AASubType,
            const int & Min_Pat_Length,
            const double & MinSupRatio,
            const int & Max_Consective_WC,
            const int & Max_Ambigu,
            ofstream & outfSeq);

int main(int argc, char *argv[])
{
  if (argc != 7) {
    cout << "Welcome to PGOneII (mining type II patterns by pattern growth from one dataset).\n"
         << "6 arguments are required:\n"
         << "1. The first argument should be the minimal number \n"
         << "   of frequent items in one pattern should be reported; (3)\n"
         << "2. The second argument should be the minimal proportion\n"
         << "   of proteins have the pattern; (0.8)\n"
         << "3. The third argument should be the maximal number of consective"
         << "wildcards (3)\n"
         << "4. The fourth argument should be the maximal ambiguousness in the \n"
         << "   number of wildcards; (1)   For example, Ax(2,4)T is 2 (4-2=2).\n"
         << "5. The fifth argument should be the pathway and filename\n"
         << "   of protein sequences in fasta format;\n"
         << "6. The sixth argument should be the pathway and filename\n"
         << "   of output file." << endl;

    return 0;
  }

    int Min_Pat_Length = atoi(argv[1]);
    double Min_Sup_ratio = atof(argv[2]);
    int Max_Consective_WC = atoi(argv[3]);
    int Max_Ambigu = atoi(argv[4]);
    
    ifstream  inf_Seq(argv[5]);
    if (!inf_Seq)
      cout << "Sorry, cannot find the file: " << argv[5] << endl;

    char OutputFilename[50];
    strcpy(OutputFilename, argv[6]);
    ofstream outfSeq(OutputFilename);
    if (!outfSeq)
      cout << "Sorry, cannot write to the file: " << argv[6] << endl;

  
  const string AASubType = "GASTCVLIMPFYWDENQHKR";
  
  // ######################### input sequences database ########################
  vector < string > SequencesDatabase;
  string TempName, TempStr;
  while ( inf_Seq >> TempName >> TempStr )
  {
     if (!TempStr.empty())
          SequencesDatabase.push_back( TempStr + "#");
  };
  cout << "Size of the database: " << SequencesDatabase.size() << endl;
  //system("PAUSE");
  // ##################### end of input sequences database #####################

  // ##################### prefixspan ##########################################
  ProjectedDatabase EmptyProData;

  for ( int AAIndex = 0; AAIndex < 20; AAIndex++ )
    {
        ProjectedDatabase ProData(EmptyProData);

        ProData.InitiateProData(AASubType[AAIndex], SequencesDatabase);
        cout << "Mining projected database for Prefix " << AASubType[AAIndex] << endl;

        mining(ProData,
               SequencesDatabase,
               AASubType,
               Min_Pat_Length,
               Min_Sup_ratio,
               Max_Consective_WC,
               Max_Ambigu,
               outfSeq);

    }
  cout << "End of this run!" << endl;
  return 0;
}

void mining( ProjectedDatabase & TempProData,
             const vector <string>& SequencesDatabase,
             const string & AASubType,
             const int & Min_Pat_Length,
             const double & MinSupRatio,
             const int & Max_Consective_WC,
             const int & Max_Ambigu,
             ofstream & outfSeq) {

  if ( TempProData.GetSupport() < SequencesDatabase.size() * MinSupRatio ) return;

  if (TempProData.GetPrefixSize() >= Min_Pat_Length) {
    vector <char> OutPrefix = TempProData.GetPrefix();
    vector <int>  OutWildcardNumber = TempProData.GetWildcardNumber();
    for (int PrefixIndex = 0; PrefixIndex < (int)(OutPrefix.size()); PrefixIndex++) {
      outfSeq << OutPrefix[PrefixIndex];
      if (PrefixIndex!= (int)(OutPrefix.size()-1)) {
        if (OutWildcardNumber[PrefixIndex*2] < OutWildcardNumber[PrefixIndex*2+1])
          outfSeq << "x(" << OutWildcardNumber[PrefixIndex*2] << "," << OutWildcardNumber[PrefixIndex*2+1] << ")";
        else if (OutWildcardNumber[PrefixIndex*2] == OutWildcardNumber[PrefixIndex*2+1]) {
          for (int WilcardOutIndex = 0; WilcardOutIndex < OutWildcardNumber[PrefixIndex*2]; WilcardOutIndex++)
            outfSeq << "x";
        }
        else cout << "The second wildcard number is bigger than the first one !!" << endl;
      }
    }
    outfSeq << "\t" << TempProData.GetSupport() << endl;
  }
  
    for (int AAIndex = 0; AAIndex < 20; AAIndex++ ) {
      for (int AmbiguIndex = 0; AmbiguIndex <= Max_Ambigu; AmbiguIndex++)
        for (int WildcardNumberLowerBoundaryIndex = 0;
                 WildcardNumberLowerBoundaryIndex <= Max_Consective_WC;
                 WildcardNumberLowerBoundaryIndex++) {
          if (WildcardNumberLowerBoundaryIndex+AmbiguIndex <= Max_Consective_WC) {
            ProjectedDatabase ProData;
            ProData.UpdateProData(TempProData,
                                  AASubType[AAIndex],
                                  SequencesDatabase,
                                  WildcardNumberLowerBoundaryIndex,
                                  WildcardNumberLowerBoundaryIndex+AmbiguIndex);
            mining(ProData,
                   SequencesDatabase,
                   AASubType,
                   Min_Pat_Length,
                   MinSupRatio,
                   Max_Consective_WC,
                   Max_Ambigu,
                   outfSeq);
          }
        }
    }
  
}






