#include <stdio.h>
#include <cstdlib>
#include <list>
#include <vector>
#include <fstream>
#include <stdlib.h>
#include <iostream>
#include <iomanip>
#include <math.h>
#include <string>
#include "PGTwoII_PerProtein.h"
#include "PGTwoII_PD.h"

using namespace std;

void mining( ProjectedDatabase & ProDataPositive,
             const vector <string>& SeqPositive,
             ProjectedDatabase & ProDataNegative,
             const vector <string>& SeqNegative,
             const string & AASubType,
             const int & Min_Pat_Length,
             const double & Min_Sup_Diff,
             const int & Max_Consective_WC,
             const int & Max_Ambigu,
             const int & Min_Evaluate,
             ofstream & outfSeq);

int main(int argc, char *argv[])
{
  if (argc != 9) {
    cout << "Welcome to PGOneII (mining type II patterns by pattern growth from one dataset).\n"
         << "6 arguments are required:\n"
         << "1. The first argument should be the minimal number \n"
         << "   of frequent items in one pattern should be reported; (3)\n"
         << "2. The second argument should be the minimal support difference. (0.8)\n"
         << "3. The third argument should be the maximal number of consective"
         << "   wildcards (3)\n"
         << "4. The fourth argument should be the maximal ambiguousness in the \n"
         << "   number of wildcards; (1)   For example, Ax(2,4)T is 2 (4-2=2).\n"
         << "5. The fifth argument should be the minimal number of non-wildcards \n"
         << "   in the patterns before evaluating support difference. (3)\n"
         << "6. The sixth argument should be the pathway and filename\n"
         << "   of positive dataset in fasta format;\n"
         << "7. The seventh argument should be the pathway and filename\n"
         << "   of negative dataset in fasta format;\n"
         << "8. The eighth argument should be the pathway and filename\n"
         << "   of output file." << endl;

    return 0;
  }

    int Min_Pat_Length = atoi(argv[1]);
    double Min_Sup_Diff = atof(argv[2]);
    int Max_Consective_WC = atoi(argv[3]);
    int Max_Ambigu = atoi(argv[4]);

    int Min_Evaluate = atoi(argv[5]);
    
    ifstream  inf_Seq_Positive(argv[6]);
    if (!inf_Seq_Positive)
      cout << "Sorry, cannot find the file: " << argv[6] << endl;

    ifstream  inf_Seq_Negative(argv[7]);
    if (!inf_Seq_Negative)
      cout << "Sorry, cannot find the file: " << argv[7] << endl;

    char OutputFilename[50];
    strcpy(OutputFilename, argv[8]);
    ofstream outfSeq(OutputFilename);
    if (!outfSeq)
      cout << "Sorry, cannot write to the file: " << argv[8] << endl;

  
  const string AASubType = "GASTCVLIMPFYWDENQHKR";
  
  // ######################### input sequences database ########################
  vector < string > SeqPositive;
  vector < string > SeqNegative;

  string TempName, TempStr;
  while ( inf_Seq_Positive >> TempName >> TempStr ) {
     if (!TempStr.empty())
          SeqPositive.push_back( TempStr + "#");
  };
  while ( inf_Seq_Negative >> TempName >> TempStr ) {
     if (!TempStr.empty())
          SeqNegative.push_back( TempStr + "#");
  };

  cout << "Size of the positive database: " << SeqPositive.size() << endl;
  cout << "Size of the negative database: " << SeqNegative.size() << endl;

  // ##################### end of input sequences database #####################

  // ##################### prefixspan ##########################################
  ProjectedDatabase EmptyProData;

  for ( int AAIndex = 0; AAIndex < 20; AAIndex++ )
    {
        ProjectedDatabase ProDataPositive(EmptyProData);
        ProjectedDatabase ProDataNegative(EmptyProData);
        ProDataPositive.InitiateProData(AASubType[AAIndex], SeqPositive);
        ProDataNegative.InitiateProData(AASubType[AAIndex], SeqNegative);
        cout << "Mining projected database for Prefix " << AASubType[AAIndex] << endl;

        mining(ProDataPositive,
               SeqPositive,
               ProDataNegative,
               SeqNegative,
               AASubType,
               Min_Pat_Length,
               Min_Sup_Diff,
               Max_Consective_WC,
               Max_Ambigu,
               Min_Evaluate,
               outfSeq);

    }
  cout << "End of this run!" << endl;
  return 0;
}

void mining( ProjectedDatabase & ProDataPositive,
             const vector <string>& SeqPositive,
             ProjectedDatabase & ProDataNegative,
             const vector <string>& SeqNegative,
             const string & AASubType,
             const int & Min_Pat_Length,
             const double & Min_Sup_Diff,
             const int & Max_Consective_WC,
             const int & Max_Ambigu,
             const int & Min_Evaluate,
             ofstream & outfSeq) {

  double SupportPositive = (double)ProDataPositive.GetSupport()/SeqPositive.size();
  double SupportNegative = (double)ProDataNegative.GetSupport()/SeqPositive.size();
  if (
       (int)ProDataPositive.GetPrefixSize()>= Min_Evaluate
       &&
       (SupportPositive - SupportNegative < Min_Sup_Diff)
     )
  return;

  if (ProDataPositive.GetPrefixSize() >= Min_Pat_Length) {
    vector <char> OutPrefix = ProDataPositive.GetPrefix();
    vector <int>  OutWildcardNumber = ProDataPositive.GetWildcardNumber();
    for (int PrefixIndex = 0; PrefixIndex < (int)(OutPrefix.size()); PrefixIndex++) {
      outfSeq << OutPrefix[PrefixIndex];
      if (PrefixIndex!= (int)(OutPrefix.size()-1)) {
        if (OutWildcardNumber[PrefixIndex*2] < OutWildcardNumber[PrefixIndex*2+1])
          outfSeq << "x(" << OutWildcardNumber[PrefixIndex*2] << "," << OutWildcardNumber[PrefixIndex*2+1] << ")";
        else if (OutWildcardNumber[PrefixIndex*2] == OutWildcardNumber[PrefixIndex*2+1]) {
          for (int WilcardOutIndex = 0; WilcardOutIndex < OutWildcardNumber[PrefixIndex*2]; WilcardOutIndex++)
            outfSeq << "x";
        }
        else cout << "The second wildcard number is bigger than the first one !!" << endl;
      }
    }
    outfSeq << "\t\t" << SupportPositive << " - " << SupportNegative << " = " << SupportPositive - SupportNegative << endl;
  }
  
    for (int AAIndex = 0; AAIndex < 20; AAIndex++ ) {
      for (int AmbiguIndex = 0; AmbiguIndex <= Max_Ambigu; AmbiguIndex++)
        for (int WildcardNumberLowerBoundaryIndex = 0;
                 WildcardNumberLowerBoundaryIndex <= Max_Consective_WC;
                 WildcardNumberLowerBoundaryIndex++) {
          if (WildcardNumberLowerBoundaryIndex+AmbiguIndex <= Max_Consective_WC) {
            ProjectedDatabase TempProDataPositive;
            ProjectedDatabase TempProDataNegative;
            TempProDataPositive.UpdateProData(ProDataPositive,
                                              AASubType[AAIndex],
                                              SeqPositive,
                                              WildcardNumberLowerBoundaryIndex,
                                              WildcardNumberLowerBoundaryIndex+AmbiguIndex);
            TempProDataNegative.UpdateProData(ProDataNegative,
                                              AASubType[AAIndex],
                                              SeqNegative,
                                              WildcardNumberLowerBoundaryIndex,
                                              WildcardNumberLowerBoundaryIndex+AmbiguIndex);

            mining(TempProDataPositive,
                   SeqPositive,
                   TempProDataNegative,
                   SeqNegative,
                   AASubType,
                   Min_Pat_Length,
                   Min_Sup_Diff,
                   Max_Consective_WC,
                   Max_Ambigu,
                   Min_Evaluate,
                   outfSeq);
          }
        }
    }
  
}








