#!/usr/bin/perl -w

#SMILE v1.47 - Extraction of structured motifs common to several sequences
#Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr)

#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation; either version 2
#of the License, or (at your option) any later version.

#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.




#use strict;

my (@tmplist, $data_file, $quorum, @lmin, @lmax, @e, $blocs, $curbloc,
    $i, @compo, @flag_compo, @saut_min, @saut_max, @comline, $output_file,
    $prog, $ok, $shufflings, @comline2, @delta, $flag_delta, $ordre, $contre,
    @symbcompo, $alpha_file, $summinblocs, $summaxblocs, $flagmaxblocinfini,
    @palindrom);
# flag_compo[i] indique si la boite i recoit une contrainte de composition
# flag_compo[0] indique une contrainte globale
# symb_compo[i][j] indique les symboles pour lesquels il existe une contrainte
#   sur la boite i (0 pour global). La dimension j est le symbole contraint
#   et symb_compo[i][j] le nb max de ce symbole j apparaissant ds une boite i

# Chemin des programmes executable ##############################################
$prog1 = "../P_BLOCS/bin/x-smile";
$prog2 = "../P_BLOCS+DELTA/bin/x-smile_delta";
$progstat = "../SigStat/bin/e-smile_shuffling";
$progfaux = "../SigStat/bin/e-smile_against";

# Fichier de noms de fichiers generes ##########################################
$namefile = ".namefile";

print "\n*********************\n  O o\n   ^    SMILE! v1.47\n  \\_/\n*********************\n\n";


if ( !@ARGV )
    {
    print "\nUsage: smile [ [-x] <parameter file> | -g <nb> ]\n";
    print "\t\t-g <nb> print a generic parameter file for <nb> boxes\n\n";
    print "\t\t-x avoid first step of extraction (if already done and output file available)\n\n";
    exit;
    }


## Generation du fichier type #################################################
if ( $ARGV[0] eq "-g" )
    {
    $blocs = $ARGV[1];

    if (!$blocs || !($blocs =~ /^\d+$/) || $blocs <= 0 )
        {
        print "\nUsage: smile [ [-x] <parameter file> | -g <nb> ]\n";
        print "\t\t-g <nb> print a generic parameter file for <nb> boxes\n\n";
        print "\t\t-x avoid first step of extraction (if already done and output file available)\n\n";
        exit;
        }

    print "EXTRACTION (Step 1) =======================================================\n";
    print "FASTA file\t\t\tfile_name\n";
    print "Output file\t\t\tfile_name_out\n\n";
    print "GLOBAL PARAMETERS =============\n";
    print "Alphabet file\t\t\tfile_name_alphabet\n";
    print "Quorum\t\t\t\t0\n";
    print "Total min length\t\t0\n";
    print "Total max length\t\t0\n";
    print "Total substitutions\t\t0\n";
    print "Boxes\t\t\t\t$blocs\n\n";

    print "Composition in ?\t\t0\t# OPTIONAL #\n\n\n";

    $i = 1;
    while ( $i <= $blocs )
        {
        unless ( $blocs == 1 )
            {
            print "BOX $i ================\n";
            print "Min length\t\t\t0\n";
            print "Max length\t\t\t0\n";
            print "Substitutions\t\t\t0\n";

            print "Composition in ?\t\t0\t# OPTIONAL #\n";
            }

        unless ( $i == $blocs )
            {
            print "Min spacer length\t\t0\n";
            print "Max spacer length\t\t0\n";
            print "Delta\t\t\t\t0\t# OPTIONAL #\n";
            }

        unless ( $i == 1 )
            {
            print "Palindrom of box\t\t0\t# OPTIONAL #\n";
            }
        print "\n";
        $i++;
        }

    print "\n\nEVALUATION (Step 2) ===================================================\n";
    print "Shufflings\t\t\t100\n";
    print "Size k-mer\t\t\t0\n";
    print "#     OR\n";
    print "Against wrong sequences\t\tfile_name_wrong_seqs\n\n";
    exit;
    }
###############################################################################
    

if ( $ARGV[0] eq "-x" )
    {
    $blocs = $ARGV[1];

    if (!$blocs)
        {
        print "\nUsage: smile [ [-x] <parameter file> | -g <nb> ]\n";
        print "\t\t-g <nb> print a generic parameter file for <nb> boxes\n\n";
        print "\t\t-x avoid first step of extraction (if already done and output file available)\n\n";
        exit;
        }
    -r "$ARGV[1]" or print "$ARGV[1] is not readable!\n" and exit;
    open(FILE,"$ARGV[1]");
    }
else
    {
    -r "$ARGV[0]" or print "$ARGV[0] is not readable!\n" and exit;
    open(FILE,"$ARGV[0]");
    }


while ( <FILE> )
    {
    if ( /FASTA file/ )
        {
        @tmplist = split(" ");
        $data_file = $tmplist[2];

        -r "$data_file" or print "Line $.: $data_file is not readable!\n" and exit;
        }
    elsif ( /Output file/ )
        {
        @tmplist = split(" ");
        $output_file = $tmplist[2];
        }
    elsif ( /Alphabet file/ )
        {
        @tmplist = split(" ");
        $alpha_file = $tmplist[2];
        }
    elsif ( /Quorum/ )
        {
        @tmplist = split(" ");
        $quorum = $tmplist[1];
        }
    elsif ( /Total min length/ )
        {
        @tmplist = split(" ");
        $lmin[0] = $tmplist[3];
        }
    elsif ( /Total max length/ )
        {
        @tmplist = split(" ");
        $lmax[0] = $tmplist[3];
        }
    elsif ( /Total substitutions/ )
        {
        @tmplist = split(" ");
        $e[0] = $tmplist[2];
        }
    elsif ( /Boxes/ )
        {
        @tmplist = split(" ");
        $blocs = $tmplist[1];

        if($blocs <= 0)
            { print "Line $.: incorrect number of boxes.\n" and exit; }
        }
    elsif ( /Shufflings/ )
        {
        @tmplist = split(" ");
        $shufflings = $tmplist[1];
        }
    elsif ( /Size k-mer/ )
        {
        @tmplist = split(" ");
        $ordre = $tmplist[2];
        }
    elsif ( /Against wrong sequences/ )
        {
        @tmplist = split(" ");
        $contre  = $tmplist[3];
        }
    elsif ( /BOX/ )
        {
        @tmplist = split(" ");
        $curbloc = $tmplist[1];

        if ( ! defined($blocs) )
            { print "Line $.: no number of boxes has been defined before\n" and exit; }

        if ( (!$curbloc) || ($curbloc <= 0) || ($curbloc > $blocs) )
            { print "Line $.: incorrect box number\n" and exit; }

        if ( $blocs == 1 )
            { print "Line $.: no need of a 'box' section if only 1 box\n" and exit; }


        while ( defined($line = <FILE>) && ($line ne "\n") )
            {
            if ( $line =~ /Min length/ )
                {
                @tmplist = split(" ", $line);
                $lmin[$curbloc] = $tmplist[2];
                }
            elsif ( $line =~ /Max length/ )
                {
                @tmplist = split(" ", $line);
                $lmax[$curbloc] = $tmplist[2];
                }
            elsif ( $line =~ /Substitutions/ )
                {
                @tmplist = split(" ", $line);
                $e[$curbloc] = $tmplist[1];
                }
            elsif ( $line =~ /Min spacer length/ )
                {
                if ( $curbloc == $blocs )
                    { print "Line $.: the last box can't have jump parameters\n" and exit; }

                @tmplist = split(" ", $line);
                $saut_min[$curbloc] = $tmplist[3];
                }
            elsif ( $line =~ /Max spacer length/ )
                {
                if ( $curbloc == $blocs )
                    { print "Line $.: the last box can't have jump parameters\n" and exit; }

                @tmplist = split(" ", $line);
                $saut_max[$curbloc] = $tmplist[3];
                }
            elsif ( $line =~ /Delta/ )
                {
                if ( $curbloc == $blocs )
                    { print "Line $.: the last box can't have a delta parameter\n" and exit;}

                @tmplist = split(" ", $line);
                $delta[$curbloc] = $tmplist[1];
                $flag_delta      = 1;
                }
            elsif ( $line =~ /Palindrom of box/ )
                {
                @tmplist = split(" ", $line);
                $palindrom[$curbloc] = $tmplist[3];
                if ( $palindrom[$curbloc]<=0 )
                    { print "Line $.: numro de bloc de palindrome incorrect\n";}
                if ($curbloc <= $palindrom[$curbloc])
                    { print "Line $.: as we consider boxes from left to right, a box can only be the palindrom of a previous box\n" and exit;}
                if ( $lmin[$curbloc] != $lmin[$palindrom[$curbloc]] 
                        || $lmax[$curbloc] != $lmax[$palindrom[$curbloc]] )
                    { print "Line $.: min and max lenght of boxes $curbloc and $palindrom[$curbloc] must be identical if they're palindroms\n" and exit;}

                }
            elsif ( $line =~ /Composition/ )
                {
                if( !defined($flag_compo[$curbloc]))
                    { $flag_compo[$curbloc] = 0; }
                @tmplist = split(" ", $line);
                $symbcompo[$curbloc][$flag_compo[$curbloc]]="$tmplist[2]";
                $compo[$curbloc][$flag_compo[$curbloc]] = $tmplist[3];
                $flag_compo[$curbloc] += 1;
                }
            else
                { 
                chomp($line);
                #print "Line $.: box parameter ignored: \"$line\"\n";
                }
                
            }
        }
    elsif ( /Composition/ )
        {
        if( !defined($flag_compo[0]))
            { $flag_compo[0] = 0; }
        @tmplist = split(" ");
        $symbcompo[0][$flag_compo[0]] = "$tmplist[2]";
        $compo[0][$flag_compo[0]] = $tmplist[3];
        $flag_compo[0] += 1;
        }
    elsif ( ! /^\n$/ )
        {
        chomp;
        #print "Line $. ignored : \"$_\"\n";
        }
    }



# Verification et preparation des parametres ##################################
if ( !defined($alpha_file) )
    { print "Alphabet file hasn't been defined!\n(Add an 'Alphabet file' line to your parameter file)\n" and exit; }
if ( !defined($data_file) )
    { print "Fasta file hasn't been defined!\n(Add a 'FASTA file' line to your parameter file)\n" and exit; }
if ( !defined($output_file) )
    { print "Output file hasn't been defined!\n(Add an 'Output file' line to your parameter file)\\n" and exit; }
if ( !defined($quorum) || $quorum <= 0 || $quorum > 100 )
    { print "Quorum is incorrect or not defined.\n(Add a 'Quorum' line to your parameter file. It's a percentage)\n" and exit; }
if ( !defined($lmin[0]) || $lmin[0] <= 0 )
    { print "Total min length is incorrect or not defined.\n(Add a 'Total min length' line to your parameter file)\n" and exit; }
if ( !defined($lmax[0]) || ( ( $lmax[0] != 0) && ($lmax[0] < $lmin[0] ) ) )
    { print "Total max length is incorrect or not defined.\n(Add a 'Total max length')\n" and exit; }
if ( !defined($e[0]) || $e[0] < 0 || ( ($lmax[0] != 0) && ($e[0] >= $lmax[0]) ) )
    { print "Total substitions number is incorrect or not defined.\n(Add a 'Total substitutions' line to your parameter file)\n" and exit; }
if ( !defined($ordre) && defined($shufflings))
    { print "The shuffling order hasn't been defined.\n(Add a 'Size k-mer' line to your parameter file)\n" and exit; }
if ( defined($ordre) && defined($shufflings) && $shufflings != 0 && $ordre <1)
    { print "Shuffling order must be greater than 0.\n", and exit; }
if ( defined($contre) && (defined($ordre) || defined($shufflings) ) )
    { print "Shuffling and Against methods are not compatible.\n" and exit; }

if (!defined ($blocs))
    { $blocs = 1; }

if( $blocs > 1 )
    {
    $summinblocs =  0;
    $summaxblocs =  0;
    $flagmaxblocinfini = 0;
    for ($i=1; $i <= $blocs; $i++)
        {
        if( $lmax[$i]==0 )
            { $flagmaxblocinfini = 1; }
        else
            { $summaxblocs += $lmax[$i]; }

        $summinblocs += $lmin[$i];

        if ( !defined($lmin[$i]) || $lmin[$i] < 0 )
            { print "Min length for box $i is incorrect or not defined.\n(Line 'Min length')\n" and exit; }
        if ( !defined($lmax[$i]) || ( ($lmax[$i] != 0) && ($lmax[$i] < $lmin[$i]) ) )
            { print "Max length for box $i is incorrect or not defined.\n(ligne 'Max length')\n" and exit; }
        if ( !defined($e[$i]) || $e[$i] <0 || ( ($lmax[$i] != 0) && ($e[$i]>=$lmax[$i]) ) )
            { print "Substitutions number for box $i is incorrect or not defined.\n(Line 'Substitutions')\n" and exit; }
        if ( $i != $blocs && (!defined($saut_min[$i]) || $saut_min[$i]<0) )
            { print "Min jump for box $i is incorrect or not defined.\n(Line 'Min spacer length')\n" and exit; }
        if ( $i != $blocs && (!defined($saut_max[$i]) || $saut_max[$i]<$saut_min[$i]) )
            { print "Max jump for box $i is incorrect or not defined.\n(Line 'Max spacer length')\n" and exit; }

        if ( defined($flag_delta) && ($i != $blocs) )
            {
            if (defined($delta[$i]))
                {
                if ( $delta[$i] < 0 || $saut_max[$i]-$saut_min[$i] < $delta[$i]*2 )
                    { print "Delta for bloc $i is incorrect : not compatible with spacer range.\n(Line 'Delta')\n"
                    and exit; }
                }
            else
                {print "Box $i has no delta and another box has one.\n(Line 'Delta'. Fix a delta equal to the spacer range if you don't want to use it for this box.)\n"
                and exit;}
            }
        }

    if($lmax[0]!=0  && $summinblocs > $lmax[0])
        { print "Sum of min length of boxes is greater than total max length\n" and exit; }
    if(!$flagmaxblocinfini && $summaxblocs < $lmin[0])
        { print "Sum of max length of boxes is lower than total min length\n" and exit; }
    }



if ( $ARGV[0] eq "-x" )
    { goto suite; }

# LANCEMENT DU PROGRAMME ######################################################
-r $data_file or print "$data_file is not readable!\n" and exit;

if (defined($flag_delta))
    { @comline = ($prog2); }
else
    { @comline = ($prog1); }

push @comline, ($alpha_file,$data_file,$output_file,$quorum,$blocs,$lmin[0],$lmax[0],$e[0]);

if ($blocs > 1)
    {
    for ($i=1; $i<$blocs; $i++)
        {
        push @comline, ($lmin[$i],$lmax[$i],$e[$i],$saut_min[$i],$saut_max[$i]);
        
        if (defined($flag_delta))
            {push @comline, ($delta[$i]);}
        }
    push @comline, ($lmin[$i],$lmax[$i],$e[$i]);


    for ($i=0; $i<=$blocs; $i++)
        {
        if(defined($flag_compo[$i]) && $flag_compo[$i] != 0)
            {
            push @comline, ($i,$flag_compo[$i]);
            for ($j=0; $j<$flag_compo[$i]; $j++)
                {
                push @comline, ($symbcompo[$i][$j],$compo[$i][$j]);
                }
            }
        }

    for ($i=1; $i<=$blocs; $i++)
        {
        if(defined($palindrom[$i]))
            {
            push @comline, ("p$i/$palindrom[$i]");
            }
        }

    }
else
    {
    if(defined($flag_compo[0]) && $flag_compo[0] != 0)
        {
        push @comline, (0,$flag_compo[0]);
        for ($j=0; $j<$flag_compo[0]; $j++)
            {
            push @comline, ($symbcompo[0][$j],$compo[0][$j]);
            }
        }
    }


    


#if ( -e "$comline[2]" )
#   {
#   print "ATTENTION! Le fichier '$comline[2]' existe dj!\n";
#   print "Tapez 'ok' pour continuer l'execution...\n";
#   $i = <STDIN>;
#   chomp($i);
#   if ($i ne "ok")
#       { print "Excution interrompue.\n" and exit; }
#   
#    unless ( -w "$comline[2]" )
#        { print "Le fichier '$comline[2]' n'est pas ouvrable en criture.\n" and exit; }
#   }

print "@comline\n";
$ok = system @comline;

if ( $ok != 0 )
    { print "Error during execution of extraction (code $ok)\n";
      if($ok == 65280)
        { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit}
    
      exit;
    }



suite:

## FAUX ####################################
if( defined($contre) )
    {
    $comline2[0] = $progfaux;
    $comline2[1] = $contre;
    $comline2[2] = $output_file;
    $comline2[3] = $output_file.".against";

    if (defined($flag_delta))
        {
        -r $namefile or print "$namefile is not readable!\n" and exit;

        open(NAMEFILE,$namefile);

        while ( defined($line = <NAMEFILE>) )
            {
            chomp($line);
            $comline2[2] = $line;
            $comline2[3] = $line.".against";

            -r $line or print "$line is not readable!\n" and exit;

            print "\n*** AGAINST '$line'\n@comline2\n";

            if ( -e "$comline2[3]" )
                {
                print "WARNING! '$comline2[3]' already exists!\n";
                print "Type 'ok' to continue...\n";
                $i = <STDIN>;
                chomp($i);
                if ($i ne "ok")
                    { print "Execution interrupted.\n" and exit; }
        
                unless ( -w "$comline2[3]" )
                    { print "'$comline2[3]' is not writable!\n" and exit; } 
                }

            $ok = system @comline2;

            if ( $ok != 0 )
                { print "Error during execution of evaluation (code $ok)\n";
                  if($ok == 65280)
                    { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit}
                exit;
                }
            }
        exit;
        }


    print "\n*** AGAINST '$comline2[1]'\n@comline2\n";

    -r $contre or print "$contre is not readable!\n" and exit;
    if ( -e "$comline2[3]" )
        {
        print "WARNING! '$comline2[3]' already exists!\n";
        print "Type 'ok' to continue...\n";
        $i = <STDIN>;
        chomp($i);
        if ($i ne "ok")
            { print "Execution interrupted.\n" and exit; }
        
        unless ( -w "$comline2[3]" )
            { print "'$comline2[3]' is not writable!\n" and exit; } 
        }
    
    $ok = system @comline2;
    
    if ( $ok != 0 )
        { print "Error during execution of evaluation (code $ok)\n";
          if($ok == 65280)
            { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit}
          exit;
        }

    exit;
    }




if ( !defined($shufflings) || $shufflings == 0 )
    { exit; }

# STATS ###########################
$comline2[0] = $progstat;
$comline2[1] = $data_file;
$comline2[2] = $output_file;
$comline2[3] = $output_file.".shuffle";
$comline2[4] = $shufflings;
$comline2[5] = $ordre;

if (defined($flag_delta))
    {
    -r $namefile or print "$namefile is not readable!\n" and exit;

    open(NAMEFILE,$namefile);

    while ( defined($line = <NAMEFILE>) )
        {
        chomp($line);
        $comline2[2] = $line;
        $comline2[3] = $line.".shuffle";

        -r $line or print "$line is not readable!\n" and exit;

        print "\n*** SHUFFLING '$line'\n@comline2\n";

        if ( -e "$comline2[3]" )
           {
            print "WARNING! '$comline2[3]' already exists!\n";
            print "Type 'ok' to continue...\n";
           $i = <STDIN>;
           chomp($i);
           if ($i ne "ok")
               { print "Execution interrupted.\n" and exit; }
       
            unless ( -w "$comline2[3]" )
                { print "'$comline2[3]' is not readable!\n" and exit; } 
           }

        $ok = system @comline2;
    
        if ( $ok != 0 )
            { print "Error during execution of evaluation (code $ok)\n";
              if($ok == 65280)
                { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit}
              exit;
            }
        }
    exit;
    }


print "\n*** SHUFFLING '$comline2[2]'\n@comline2\n";
-r $output_file or print "$output_file is not readable!\n" and exit;
if ( -e "$comline2[3]" )
    {
    print "WARNING! '$comline2[3]' already exists!\n";
    print "Type 'ok' to continue...\n";
    $i = <STDIN>;
    chomp($i);
    if ($i ne "ok")
        { print "Execution interrupted.\n" and exit; }
        
    unless ( -w "$comline2[3]" )
        { print "'$comline2[3]' is not writable!\n" and exit; } 
    }
    
$ok = system @comline2;
    
if ( $ok != 0 )
    { print "Error during execution of evaluation (code $ok)\n";
      if($ok == 65280)
        { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit}
      exit;
    }
