#!/usr/bin/env perl

=pod

=head1 NAME

footprintdb-rest

=head1 DESCRIPTION

Scans protein sequences against https://footprintdb.eead.csic.es databases to predict 
recognized DNA motifs based on i) overall and ii) binding interface sequence similarity.
Depends on curl to query the REST service http://footprintdb.eead.csic.es:8080/protein

=head1 AUTHORS

=over

=item Bruno Contreras Moreira <bcontreras\@eead.csic.es>

=back

=head1 CATEGORY

genomics

=head1 USAGE

 footprintdb-rest -seq protein.fasta
 
 footprintdb-rest -seq protein.fasta -db JASPAR

=head1 INPUT FORMAT

File with protein sequences in FASTA format. 

=head1 OUTPUT FORMAT

Predicted DNA motifs in TRANSFAC format.

=head1 EXAMPLES

=head2 Quick demo

Option I<-demo> runs a simple demo with one transcription factor sequence

 footprintdb-rest -demo     

=cut

BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}

require "RSA.lib";

################################################################
## Main package
package main;
{
  
  ################################################################
  #### initialise parameters and vars
  our $start_time = &RSAT::util::StartScript();

  my $server = 'http://footprintdb.eead.csic.es:8080/protein'; #/echo useful while debugging

  my $curlEXE = 'curl --no-progress-meter --stderr - -X POST -H "Content-Type: application/json" ';

  our %outfile = ();
  our %params = ('json' => '');

  our $out = STDOUT;
  our $verbose = 0;
  
  # parse arguments
  &ReadArguments();

  if($params{'seq'}) {
 
    my ($h,@headers,%seqs);

    open(FAA,'<',$params{'seq'})|| 
      RSAT::error::FatalError("cannot read $params{'seq'}");
    while(<FAA>) {
      chomp;	    
      if(/^>(\S+)/){ 
        $h = $1;	      
        push(@headers, $h) 

      } else { 
        $seqs{$h} .= $_;
      }
    }
    close(FAA);

    $params{'json'} = '{';    
    foreach $h (@headers) {
      $params{'json'} .= sprintf("\"%s\":\"%s\",",$h,$seqs{$h});
    }
    chop($params{'json'}); # remove trailing ,  
    $params{'json'} .= '}';

  } elsif($params{'demo'}) {
    $params{'json'} = '{"demo":"IYNLSRRFAQRGFSPREFRLTMTRGDIGNYLGLTVETISRLLGRFQKSGMLAVKGKYITIEN"}';

  } else {
    RSAT::error::FatalError("You must either specify an input FASTA file (-seq) or run the demo (-demo)");
  }
 
  if ($main::verbose >= 1) {
    Verbose();
  }

  # open output stream
  $main::out = OpenOutputFile($main::outfile{'output'});

  # actually perform REST request
  my $motif = '';
  open(REST,"$curlEXE -d '$params{'json'}' $server |")
    || RSAT::error::FatalError("cannot run $curlEXE -d '$params{'json'}' $server");
  while(<REST>) {

    $motif .= $_; 

    if(/^\/\//){
      if($params{'db'}) {
        print $out $motif if($motif =~ m/$params{'db'}/i)
      } else {
        print $out $motif
      }

      # init new motif
      $motif = '';  
    }
    
  }
  close(REST);

  close_and_quit();
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################

################################################################
### Close output file and quit
sub close_and_quit {
  if ($main::verbose >= 1) {
    my $exec_time = &RSAT::util::ReportExecutionTime($start_time);
    print $exec_time;
  }

  exit(0);
}

################################################################
#### display full help message 
sub PrintHelp {
    system "pod2text -c $0";
    exit()
}

################################################################
#### display short help message
sub PrintOptions {
    &PrintHelp();
}

################################################################
#### Read arguments 
sub ReadArguments {
  my $arg = "";
  
  my @arguments = @ARGV; 
 
  while ($arg = shift(@arguments)) {

=pod
    
=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut

    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])){ $main::verbose = shift(@arguments) } 
    
=pod

=item B<-seq input sequence file>

FASTA file with protein sequences

=cut

        } elsif ($arg eq '-seq') {
          $main::params{'seq'} = shift (@arguments);
=pod

=item B<-db database>

Take only motifs from this footprintDB database, see https://footprintdb.eead.csic.es/index.php?databases

=cut

        } elsif ($arg eq '-db') {
          $main::params{'db'} = shift (@arguments);
=pod

=item   B<-o outputfile>

If no output file is specified, the standard output is used.  This
allows to use the command within a pipe.

=cut
    } elsif ($arg eq "-o") {
      $main::outfile{'output'} = shift(@arguments);

=pod

=item B<-demo>

Run demo

=cut

        } elsif ($arg eq '-demo') {
          $main::params{'demo'} = 1;
=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
	  &PrintHelp();
=pod

=item B<-help>

Same as -h

=cut
	} elsif ($arg eq "-help") {
	  &PrintOptions();
	
=pod

=back

=cut

    }
  }
}

################################################################
#### verbose message
sub Verbose {
  print $main::out "; footprintpdb-rest ";
  &PrintArguments($main::out);

  printf $main::out "; %-21s\t%s\n", "seq", $main::params{'seq'};  
  printf $main::out "; %-21s\t%s\n", "db", $main::params{'db'};
  printf $main::out "; %-21s\t%s\n", "output", $main::outfile{'output'};
  printf $main::out "; %-21s\t%s\n\n", "json", $main::params{'json'};
}

__END__

