#!/usr/bin/env perl
=pod

=head1 NAME

network-interactions

=head1 VERSION

version 1

=head1 DESCRIPTION

Constructs a Gene Network, mainly a Gene Regulatory Network, based on the
output of <matrix-scan> from a list of genes and transcriptional factors of
interest.

=head1 AUTHORS

mpadilla@lcgej.unam.mx

=head1 CATEGORY

=over

=item network tool

=back

=head1 USAGE

network-interactions -tfs tfslistfile -cre crebedfile -genome genome
    [-report_net] [-net networkfile]

=head1 INPUT FORMATS

=head2 tfs list file

A one-column file with all transcriptional factors of interest.

=head2 cre bed file

A bed file referring all regulatory regions of all genes of interest
(including TFs).

=head2 network file

A tab-delimited, two-column file indicating all two-node interactions,
one per row.

=head1 OUTPUT FORMATS

=head2 complete_direct_interactions_date.tsv

Two-column, tab-delimited file indicating all direct interactions found
within all genes specified in bed file of cis-regulatory regions. Each
interaction is outputted in a single row where the first one (a TF)
regulates the second one (any gene).

=head2 GRN_direct_interactions_date.tsv

Gene Regulatory Network direct interactions.
Two-column, tab-delimited file indicating all TFs' direct interactions,
each interaction is specified in a row where the first one regulates the
second one.

=head2 GRN_indirect_interactions_date.tsv

Gene Regulatory Network indirect interactions.
Because the program only founds direct interactions, this file indicates
all direct interactions composed by 3 nodes in each row.
E.g.
       TFx ==> TFy ==> TFz
would be read as:
TFx regulates TFy and  TFy regulates TFz, so TFx indirectly regulates TFz.

If a TF has no direct targets, hence no indirect targets, it is
specified as: "TFx has no targets". If this given TF is a direct
target of another TF, then it will simply don't have an arrow
following another TF.
E.g.1   TFx ==> TFy, in this case TFy has no targets.

Cases of a TF regulating theirselves are only specified once, as a
two-node interaction.
E.g.    TFx ==> TFx

=head2 Network Comparision (-report_net)

When an input network is given, a comparision is made between the input network
and the one generated by the program (complete_direct_interactions_date.tsv).
This comparision is made taking into account all genes refered to in the
input bed file and will generate the three following output files:

=head2 network_intersection_date.tsv

A two-column, tab-delimited file containing, one per row, all the common
interactions between the input network and the network of all genes
generated by the program.

=head2 network_rejected_interactions_date.tsv

A two-column, tab-delimited file containing, one per row, all the
interactions found in the input network but not re-found in the output
network. (The assymetric difference for input network)
An interaction can be "rejected" for several reasons but this
basically happens when, given an interaction between two genes in the input
network, the matrix describing the binding site of first one  (a TF) is not
found (by matrix-scan) in the regulatory region of the second one
(the target gene).

=head2 network_found_interactions_date.tsv

A two-column, tab-delimited file containing, one per row, all the interactions
found by the program but that were not specified in the input network.
(The assymetric difference for the complete network).


=head1 SEE ALSO

=head1 WISH LIST

=over

=item B<wish 1>

=item B<wish 2>

=back

=cut

use strict;
use warnings;

BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}
require "RSA.lib";

################################################################
## Main package
package main;
{

  ################################################################
  ## Initialise parameters
  our $start_time = &RSAT::util::StartScript();
  our $program_version = do { my @r = (q$Revision: 1.48 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  #    $program_version = "0.00";

  #our %infile = ();
  #our %outfile = ();

  our $verbose = 0;
  our $in = \*STDIN;
  my $out = \*STDOUT;

  # command-line arguments
  %main::infile = ();
  #%main::outfile = ();
  our $databases = ""; # matrixes dbs
  our $genome = ""; # working genome version
  our $orgdb = ""; # alternative to databases
  our $report_net = 0; # boolean, 1 if network is provided
  #our $net_type = "not_grn"; # default for network type

  # other variables
  my $new_enhancer; # filehandle of filtered bed file
  my @genes_reg = (); # genes regulated
  my %nonRep_genes_reg; # hash of non-repetitive genes regulated
  my $val = 0; # values of has %nonRep_genes_reg
  my $i; # general counter variable

  my %nonRepTFs; # hash of non repetitive TFs
  my $report_tfs_noRegSeq = ""; # string with TFs not found on bed file
  my $report_tfs_RegSeq = ""; # string with TFs found on bed file

  my (@net_old_allgenes, @net_old_onlyTFs); # networks from input
  my %noRegSeq = (); # genes lacking regulatory sequence
  my $noRegSeq_reporting = ""; # string of genes lacking reg seq
  my %RegSeq = (); # genes with regulatory sequence on input
  my $RegSeq_reporting = ""; # string of genes with reg seq
  my $z = 0; # values of a hash
  my $flag = 0;
  my $ngenes = 0; # number of genes regulated
  my $ntfs = 0; # number of genes regulated

  my @matfiles_path = (); # rsat path to matrixes in use
  my @dbs = ();  # list of databases
  my @fields = (); # general var, list of fields from rows in a file
  my @nonRepTFs_k; # TFs list
  my $fasta; # filehandle of sequences
  my $new_fasta; # filehandle of sequences with headers
  my $cmd = ""; # general variable for &doit
  my $ACs = ""; # list of ACs from matrixes to use

  my (@net_new_allgenes, @net_new_onlyTFs); # networks
  my @seq_id; # seq_id splitted from matrix-scan results
  my %interaction_details;
  my @gene_name = ();
  my @deadend = (); # binary list with length of total TFs, 1 if the TF has no direct interactions
  my $matrixscan_re; # filehandle of matrix-scan results

  my $date = ""; # date for file writing
  my $count = 0;
  my %nonRepTFs_inv; # inverse hash of nonRepTFs

  my $direct; # filehandle of file with GRN direct interactions (main output)
  my $path = ""; # construction of paths in node

  ################################################################
  ## Read argument values
  &ReadArguments();

  ################################################################
  ## Check argument values
  unless ($main::infile{TFs}) {
    &RSAT::error::FatalError("File with the network's transcription factors must be provided\n It must be a one-column file with list of TFs");
  }
  unless ($main::infile{cres}) {
    &RSAT::error::FatalError("Bed file of enhancer sequences must be provided\n On the 4th column the gene it is referring to must be specified");
  }
  unless ($main::genome) {
    &RSAT::error::FatalError("Genome version must be specified with option -genome (e.g. mm9, hg19)");
  }
  if($report_net){
    unless ($main::infile{network}) {
      &RSAT::error::FatalError("Turned on network reporting (-rep_net) but no network (-net) provided");
    }
  }

  # Decide which transfac files to use
  # CODE: add all from organism option

  if($main::databases eq ""){ # default option
    #print("Entered default option of dbs\n");
    $matfiles_path[0] = "rsat/public_html/motif_databases/JASPAR/Jaspar_2018/nonredundant/JASPAR2018_CORE_vertebrates_non-redundant_pfms_transfac.tf";
    #print($matfiles_path[0]."\n");
  } else {
    if($main::databases eq "all"){ # CODE: falta anadir algo que soporte que meta los nombres de dbs como sea
      @dbs = qw(ENCODE epigram Hocomoco_human Hocomoco_mouse Homer hPDI HT_Selex Jolma_2013 Jolma_2015 NCAP-SELEX CAP-SELEX RSAT JASPAR cisBP Yeastract DrosophilaTFs);
    } else { @dbs = split(/ |,|;/, $main::databases) }

    # retrieve paths to databases from db_matrix_files.tab
    open(my $dbs_file, "<", "rsat/public_html/motif_databases/db_matrix_files.tab") or die "Cannot open file $!";
    $i = 0;
    while(my $row = <$dbs_file>){
      chomp($row);
      next if( $row =~ /^;/);
      next if( $row =~ /^#/);

      foreach my $db (@dbs){
        if( $row =~ /^$db/){
          @fields = split(/\t/,$row);
          $matfiles_path[$i++] = "rsat/public_html/motif_databases/".$fields[2];
          #print(" matrix file path number ".$c." : ".$matfiles_path[$c]."\n");
          #$c++;
        }
      }
    }
    close $dbs_file;
  }
#  print("# of paths = ".@matfiles_path."\n");

  $date = &AlphaDate();

  ################################################################
  ####################################### Save Input Data

  # Rewrite to bedfile without NAs or if no gene is refered to
  # Also, save the genes refered to in bed file of enhancers
  #        I want to preserve the order
  $val = 0; # first value is always 1 in hash
  $i = 1;

  my ($enhancer, $enh_dir) = &OpenInputFile($main::infile{cres});

  open( $new_enhancer, ">", "network-interactions_used_regulatory_regions_".$date.".bed") or die "Cannot open file $!";
  print $new_enhancer "# Bed file used for cis-regulatory regions\n";
  while(my $enh_row = <$enhancer>){
    chomp($enh_row);
    next unless ($enh_row =~ /\S/); ## Skip empty rows
    next if ($enh_row =~ /^;/); ## Skip comment rows
    next if ($enh_row =~ /^#/); ## Skip header rows

    if($enh_row =~ /^chr\w{1,2}\t\d+\t\d+\t(\w+).*/){
      if($1 ne "NA"){

          # write to new bed file and save order of genes
          print $new_enhancer $enh_row."\n";
          push(@genes_reg,$1);

          # add values in sequential order
          # gene is key, value is a number
          if( $val == 0 ){
            $nonRep_genes_reg{$1} = $val++;
          } elsif(!$nonRep_genes_reg{$1}) {
            $nonRep_genes_reg{$1} = $val++;
          }

      }
    } else {
      print(" You are missing a gene name in bed file at line: ".$i."\n");
      #&RSAT::error::FatalError("Error: not a fourth column of genes in file ".$main::cres."\n");
      #exit(1);
    }
    $i++;
  }

  close $enhancer;
  close $new_enhancer;

  &RSAT::error::FatalError("Error: not a single gene name was specified on bed file: ".$main::infile{cres}."\n") unless (scalar(@genes_reg) > 0);
  #print ("# of genes regulated = ".@genes_reg."\n");
  #print ("# of nonRep genes regulated = ".%nonRep_genes_reg."\n");

  #############################################################
  # Save non-repited TFs
  $i = 0;

  my ($TF, $TF_dir) = &OpenInputFile($main::infile{TFs});
  #open( my $TF,"<",$main::infile{TFs}) or die "Cannot open file $!";
  while( my $row = <$TF> ){
    next unless ($row =~ /\S/); ## Skip empty rows
    next if ($row =~ /^;/); ## Skip comment rows
    next if ($row =~ /^#/); ## Skip header rows
    chomp($row);
    #print("In TFs file: ".$row."\n");
    # distinguish if their sequences were reported
    if ( $nonRep_genes_reg{$row} ){
      #print("Tf in nonRep_genes_reg\n");
      # add values in sequential order, beginning in 1
      if( $i == 0 ){
        $nonRepTFs{$row} = $i++;
      } elsif(!$nonRepTFs{$row}) {
        $nonRepTFs{$row} = $i++;
      }

      $report_tfs_RegSeq .= $row.",";

    } else {
      $report_tfs_noRegSeq .= $row.",";
    }

  }
  #print("# of nonRepTFs = ".%nonRepTFs."\n");
  close $TF;

  # notice user about the lack of regulatory seqences
  print("\nSeems like you haven't reported the regulatory sequences for the following TFs : ".$report_tfs_noRegSeq."\n");
  if( $report_net){
    print(" The program will take into account the rest for further network comparision : ".$report_tfs_RegSeq."\n\n");
  }

  ## Check input TFs
  &RSAT::error::FatalError("Error: not a single TF was specified on: ".$main::infile{TFs}."\n") unless (scalar(%nonRepTFs) > 0);

  #############################################################
  # Save input network and create input GRN
  $flag = 0;
  $ngenes = %nonRep_genes_reg-1;
  $ntfs = %nonRepTFs -1;

#  if($report_net){

    #initialize matrixes to zeros
    # input all-genes net
    #print($val."\n");
    for $i (0..$ngenes){
      for my $j (0..$ngenes){
        $net_old_allgenes[$i][$j] = 0;
      }
    }
    # input only-TFs net
    for $i (0..$ntfs){
      for my $j (0..$ntfs){
        $net_old_onlyTFs[$i][$j] = 0;
      }
    }

    $i = 0;
    # Save input network interactions
    my ($net, $net_dir) = &OpenInputFile($main::infile{network});
    #open(my $net, "<", $main::network) or die "Cannot open file $!";
    while (my $net_row = <$net>){
      chomp($net_row);
      next unless ($net_row =~ /\S/); ## Skip empty rows
      next if ($net_row =~ /^;/); ## Skip comment rows
      next if ($net_row =~ /^#/); ## Skip header rows

      @fields = split(/\t/, $net_row);

      # make list of genes in network with regulatory sequences reported and not reported
      if($nonRep_genes_reg{$fields[0]}){
        $RegSeq{$fields[0]} = $z++;
        $flag++;
      } else {
        $noRegSeq{$fields[0]} = $z++;
      }
      if($nonRep_genes_reg{$fields[1]}){
        $RegSeq{$fields[1]} = $z++;
        $flag++;
      } else {
        $noRegSeq{$fields[1]} = $z++;
      }

      # en el reporte de comparacion de redes solo se tomaran en cuenta las interacciones en las que ambos genes tenian la sec reportada en -cre
      # if both genes have a reported regulatory sequence
      if( $flag == 2 ){
        # construct network of all genes
        $net_old_allgenes[ $nonRep_genes_reg{$fields[0]}-1 ][ $nonRep_genes_reg{$fields[1]}-1 ] = 1;
        # if the interaction is between TFs, add this one to the input's only-TFs network
        if( $nonRepTFs{$fields[0]} && $nonRepTFs{$fields[1]} ){
          $net_old_onlyTFs[ $nonRepTFs{$fields[0]}-1 ][ $nonRepTFs{$fields[1]}-1 ] = 1;
        }
      }
      # reinitialize value
      $flag =0;

    }
    close $net;

    # message if sequences of any genes in network were not reported
    if(%noRegSeq){
      foreach my $gene (sort keys %noRegSeq){
        $noRegSeq_reporting .= "".$gene.",";
        #print($gene."\n");
      }
      foreach my $gene (sort keys %RegSeq){
        $RegSeq_reporting .= "".$gene.",";
      }
      # Tell user which regulatory sequences are lacking and which genes are used in the program
      print("\nSeems like you haven't reported the regulatory sequences for the following genes in your input network: ".$noRegSeq_reporting."\n");
      print("The program will use the rest of the genes for further network comparison: ".$RegSeq_reporting."\n\n");
    }

  #} # report_net

  ################################################################
  ####################################### RSAT processing

  # Create the file with all the matrixes to use
  #   retrieve-matrix foreach database foreach TF
  @nonRepTFs_k = keys %nonRepTFs;

  foreach my $db (@matfiles_path){
    $ACs = &retrieveACsfromTFs($db, @nonRepTFs_k);
    $cmd = "retrieve-matrix -i ".$db." -id ".$ACs." >> network-interactions_used_matrixes_".$date.".tf \n";
    &doit($cmd);
  }

  # Retrieve sequences of regulatory elements
  print("\nRetrieving enhancer sequences ...");
  &doit("fetch-sequences -i network-interactions_used_regulatory_regions_".$date.".bed -genome ".$main::genome." -v 1  -header_format galaxy -o network-interactions_used_cre_seqs_".$date.".fa");
  print("\tDone!\n");

  # Add regulated genes names to fasta of regulatory sequences
  #     Im using 'external_id' not ensembl_id
  open( $fasta, "<", "network-interactions_used_cre_seqs_".$date.".fa") or die "Cannot open file $!";
  open( $new_fasta, ">", "network-interactions_used_cre_seqs_geneheader__".$date.".fa") or die "Cannot open file $!";

  $i = 0;
  while(my $fa_row = <$fasta> ){
    chomp($fa_row);

    if($fa_row =~ /^>/){
      print $new_fasta $fa_row."_*_".$genes_reg[$i++]."\n";# this must be at the identifier line e.g.: >seq1_*_$genes_reg[$i++]
                                                           #  only so that matrix-scan takes this whole name as seq_id
    } else {
      print $new_fasta $fa_row."\n";
    }
  }
  close $fasta;
  close $new_fasta;

  # Search motifs in all regulatory sequences
  print("\n matrix-scan running ...");
  $cmd = "matrix-scan -quick -v 1 -matrix_format transfac -m network-interactions_used_matrixes_".$date.".tf -bginput -markov 1";
  $cmd.= " -i network-interactions_used_cre_seqs_geneheader__".$date.".fa -o network-interactions_used_matrix-scan_results_".$date.".tsv";
  $cmd.= " -id_as_name -lth score 5";
  &doit($cmd);
  print("\tDone!\n");

  ################################################################
  ## Constuct networks and save matrix-scan details of interactions

  for $i (0..$ngenes){
    for my $j (0..$ngenes){
      $net_new_allgenes[$i][$j] = 0;
    }
  }
  # input only-TFs net
  for $i (0..$ntfs){
    for my $j (0..$ntfs){
      $net_new_onlyTFs[$i][$j] = 0;
    }
    # initialize to zeros
    $deadend[$i] = 0;
  }

  print("Network comparision running ...");
  # Create matrixes of networks:
  open( $matrixscan_re, "<", "network-interactions_used_matrix-scan_results_".$date.".tsv") or die "Cannot open file $!";
  #open( $matrixscan_re, "<", "network-interactions_used_matrix-scan_results_2019-12-17.133144.tsv") or die "Cannot open file $!";
  my $a = 1;
  while(my $results_row = <$matrixscan_re>){
    chomp($results_row);
    next unless ($results_row =~ /\S/); ## Skip empty rows
    next if ($results_row =~ /^;/); ## Skip comment rows
    next if ($results_row =~ /^#/); ## Skip header rows

    @fields = split(/\t/,$results_row); # $fields[2] is the TF
    @seq_id = split(/_/,$fields[0]); # >genome_chr_start_end_+_genename
    @gene_name = split(/_\*_/,$fields[0]);

    if($nonRep_genes_reg{$gene_name[1]} && $nonRep_genes_reg{$fields[2]}){ # no error
      if( !$net_new_allgenes[ $nonRep_genes_reg{$fields[2]}-1 ][ $nonRep_genes_reg{$gene_name[1]}-1 ] ){

          # set interaction in directed-network
          $net_new_allgenes[ $nonRep_genes_reg{$fields[2]}-1 ][ $nonRep_genes_reg{$gene_name[1]}-1] = 1;

          # if it is a TF-TF interaction, save it on the GRN
          if ($nonRepTFs{$fields[2]} && $nonRepTFs{$gene_name[1]}){
            $net_new_onlyTFs[ $nonRepTFs{$fields[2]}-1 ][ $nonRepTFs{$gene_name[1]}-1 ] = 1;
          }
      }
      $interaction_details{$fields[2]}{$gene_name[1]}{$seq_id[1].":".$seq_id[2]."-".$seq_id[3]}{$fields[3]."_".$fields[4]."_".$fields[5]}{$fields[7]} = $a++;

    }

  } #  while
  close $matrixscan_re;
  print("\t Done\n");

  ########################################################################
  ################### Comparisons and write-to-file reporting
  my %nonRep_genes_reg_inv;
  my $rowtofile;
  my $pos = "";
  my $scores = "";

  if( $report_net ){

    ## Comparison of complete gene networks
    my $interaction_gene_row = &OpenOutputFile("complete_direct_interactions_".$date.".tsv");
    &Verbose($interaction_gene_row) if ($main::verbose >= 1);
    print $interaction_gene_row "# Complete Network\n#TF\tGene\tCoordinates\tPositions\tScores\n"; # header
    my $intersection = &OpenOutputFile("network_intersection_".$date.".tsv");
    &Verbose($intersection) if ($main::verbose >= 1);
    print $intersection "# Interactions in common from:\n#\t".$main::infile{network}." and prueba_direct_interactions_complete.tsv\n#TFx (regulates) Geney\n"; # header
    my $rejected = &OpenOutputFile("network_rejected_interactions_".$date.".tsv");
    &Verbose($rejected) if ($main::verbose >= 1);
    print $rejected "# Interactions in:\t".$main::infile{network}." not found in prueba_direct_interactions_complete.tsv\n#Genex\tGeney\n"; # header
    my $found = &OpenOutputFile("network_found_interactions_".$date.".tsv");
    &Verbose($found) if ($main::verbose >= 1);
    print $found "# Newly found interactions in:\tprueba_direct_interactions_complete.tsv not found in ".$main::infile{network}."\n#TFx\tGeney\n"; # header

    %nonRep_genes_reg_inv = reverse %nonRep_genes_reg;
    my $tmp;

    for $i (0..$ngenes){
      for my $j (0..$ngenes){

          # write interaction to file according to the case: intersection, rejected or newly found
          if( $net_new_allgenes[$i][$j] ){

            $rowtofile = &writeRow( $nonRep_genes_reg_inv{ $i+1 }, $nonRep_genes_reg_inv{ $j+1 }, \%{ $interaction_details{$nonRep_genes_reg_inv{ $i+1 }}{$nonRep_genes_reg_inv{ $j+1 }} } );

            print $interaction_gene_row $rowtofile;

            if( $net_old_allgenes[$i][$j] ){
              # overlap (only interactions, not lack of them)
              print $intersection $rowtofile;
            } else {
              # newly found
              print $found $rowtofile;
            }

          } elsif ( $net_old_allgenes[$i][$j] ){
            # rejected (no details)
            print $rejected $nonRep_genes_reg_inv{ $i+1 }."\t".$nonRep_genes_reg_inv{ $j+1 }."\n";
          }

      } # for
    } # for

    close $interaction_gene_row;
    close $intersection;
    close $found;
    close $rejected;

    ## Comparison of GRNs ?
  } # report net

  ################################################################
  ## Report all direct interactions (main output)

  open( $direct, ">", "GRN_direct_interactions_".$date.".tsv") or die "Cannot open file $!";
  &Verbose($direct) if ($main::verbose >= 1);
  print $direct "#Regulator\tRegulated\tCoordinates\tPositions\tScore\n"; # header

  %nonRepTFs_inv = reverse %nonRepTFs;
  $count = 0;

  for my $i (0..$ntfs){
    for my $j (0..$ntfs){

      # write to file direct interactions
      if( $net_new_onlyTFs[$i][$j] ){

        $rowtofile = &writeRow( $nonRepTFs_inv{ $i+1 }, $nonRepTFs_inv{ $j+1 }, \%{ $interaction_details{$nonRepTFs_inv{ $i+1 }}{$nonRepTFs_inv{ $j+1 }} } );
        print $direct $rowtofile;

      } else {
        # add node to list of deadend nodes
        if($count == $ntfs){
          $deadend[$i] = 1;
        }
        $count++;
      }

    } # for
  } # for

  close $direct;

  ################################################################
  ##  CASES OF DIRECT AND INDIRECT INTERACTIONS SIMULTANEOUSLY
  # E.g.:
  #   IN GRN_direct_interactions.tsv:
  # TF1 TF2
  # TF2 TF3
  # TF1 TF3
  #   IN GRN_indirect_interactions.tsv_:
  # TF1 ==> TF2 ==> TF3
  # TF2 ==> TF3
  # TF1 ==> TF3
  # Interpretation: TF1 regulates directly and indirectly TF3.

  $path = "\n"; # string to concat paths of 3 nodes
  my $details1 = "";
  my $details2 = "";
  my $coo = "";

  ## Report all paths of three (instead of cases of direct and indirect interactions simoultaneously)
  open( my $indirect, ">", "GRN_indirect_interactions_".$date.".tsv") or die "Cannot open file $!";
  &Verbose($indirect) if ($main::verbose >= 1);
  print $indirect "# Report of 3-nodes direct interactions\n# cases of no direct interactions are specified.\n#node1 ==> node2 ==> node3\n\n"; #header

  for my $row (0..$ntfs){
    #$path = "";
    # check if the node is a deadend
    if ( !$deadend[$row] ){
      for my $con1 (0..$ntfs){
        # find direct connections
        if( $net_new_onlyTFs[$row][$con1] ){

          $details1 = &writeRow( $nonRepTFs_inv{ $row+1 }, $nonRepTFs_inv{ $con1+1 }, \%{ $interaction_details{$nonRepTFs_inv{ $row+1 }}{$nonRepTFs_inv{ $con1+1 }} } );

          # construct path
          $path .= $nonRepTFs_inv{ $row+1 }."\t==>\t".$nonRepTFs_inv{ $con1+1 };
          # report if its a connection to itself
          if ($row == $con1){
            print $indirect $path."\n";
            print $indirect $details1."\n";
          } elsif ( !$deadend[$con1] ){

            for my $con2 (0..$ntfs){
              # skip it its a connection to itself
              if( $con1 == $con2){ next; }
              if( $net_new_onlyTFs[$con1][$con2] ){

                $details2 = &writeRow( $nonRepTFs_inv{ $con1+1 }, $nonRepTFs_inv{ $con2+1 }, \%{ $interaction_details{$nonRepTFs_inv{ $con1+1 }}{$nonRepTFs_inv{ $con2+1 }} } );

                # report 3-node connection to file
                $path .= "\t==>\t".$nonRepTFs_inv{ $con2+1 };
                print $indirect $path."\n";
                print $indirect $details1."==>\n".$details2."\n";
                # set the 2-node connection for more 3-node connections
                $path = "\n".$nonRepTFs_inv{ $row+1 }."\t==>\t".$nonRepTFs_inv{ $con1+1 };
              } # direct connection 2 found
            }

          } else {
            # if it was a dead end report 2-node connection
            print $indirect "".$path."\n";
            print $indirect $details1."\n";
          }
          $path = "\n";

        } # direct connection 1 found
      } # for connection 1
    } else {
      print $indirect "".$nonRepTFs_inv{ $row+1 }." has no targets.\n";
      $path = "\n";
    }

  } # for row

  close $indirect;

  ## Print verbose
  #&Verbose() if ($main::verbose >= 1);

  #&close_and_quit();

} # main

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################
=pod
################################################################
## Close output file and quit
sub close_and_quit {

  ## Report execution time
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
#  print $main::out $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
# cambiar a los nombres de los filehandles de output aqui

  ## CLOSE OTHER FILES HERE IF REQUIRED
  #if ($outfile{output}) {
  #  close $main::out;
  #  &RSAT::message::TimeWarn("Output file", $outfile{output}) if ($main::verbose >= 2);
  #}

  exit(0);
}
=cut
################################################################
## Display full help message
sub PrintHelp {
  system "pod2text -c $0";
  exit()
}

## Display short help message
sub PrintOptions {
  &PrintHelp();
}

################################################################
## Read arguments
sub ReadArguments {
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);
=pod
=head1 OPTIONS
=over 4

=item B<-v #>
Level of verbosity.
=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
          $main::verbose = shift(@arguments);
      } else {
          $main::verbose = 1;
      }
=item B<-h>
Display full help message.
=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();
=item B<-help>
Same as -h.
=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();
    }# elsif ($arg eq "-i") {
      #$main::infile{input} = shift(@arguments);
    #}
=item B<-tfs TFs_infile>
Mandaroty.
One-column input file with list of TFs in network.
=cut
    elsif ($arg eq "-tfs") {
      $main::infile{TFs} = shift(@arguments);
=item B<-cre cre_infile>
Mandaroty.
Bed file with of regulatory sequences per gene in network.
Each sequence must refer to its regulated-gene on the 4th column.
=cut
    } elsif ($arg eq "-cre") {
      $main::infile{cres} = shift(@arguments);
=item B<-genome genome_version>
Mandaroty.
Working genome version.
=cut
    } elsif ($arg eq "-genome") {
      $main::genome = shift(@arguments);
=item B<-db databases>
Database(s) to use separated by commas.
=cut
    } elsif ($arg eq "-db") {
      $main::databases = shift(@arguments);
=item B<-net network_infile>
Mandaroty if -report_net is set to 1.
The network must be a two-column file with each row containing a single interaction (direct or indirect).
=cut
    } elsif ($arg eq "-net") {
      $main::infile{network} = shift(@arguments);
=item B<-report_net>
Report network,
  i.e., differences and overlap between the input network and the new network are outputted
=cut
    } elsif ($arg eq "-report_net") {
      $main::report_net = 1;
#=item B<-net_type grn|not_grn>
#Network type specification. Could be GRN (Gene Regulatory Network) containing only TFs,
#of not_grn containing both genes codifying for TFs and genes codifying for not TFs.
#=cut
    } #elsif ($arg eq "-net_type") {
      #$main::net_type = shift(@arguments);
    #}# elsif ($arg eq "-o") {
      #$outfile{output} = shift(@arguments);
    #}
    else {
      &FatalError(join("\t", "Invalid option", $arg));
    }
  } # while

=pod
=back
=cut

} # sub ReadArguments


################################################################
## Verbose message
sub Verbose {
    my($out) = @_;

    print $out "; network-interactions ";
    &PrintArguments($out);
    #printf $out "; %-22s\t%s\n", "Program version", $program_version;
    if (%main::infile) {
        print $out "; Input files\n";
        while (my ($key,$value) = each %main::infile) {
            printf $out ";\t%-13s\t%s\n", $key, $value;
        }
    }
    #if (%main::outfile) {
    #    print $out "; Output files\n";
    #    while (my ($key,$value) = each %outfile) {
    #        printf $out ";\t%-13s\t%s\n", $key, $value;
    #    }
    #}
}


################################################################
## Obtain list of the accesions from specified TFs to call retrieve-matrix on
sub retrieveACsfromTFs {
    my ($database, @TFnames) = @_;

    # Check in which field the TF name appears
    my $isinAC = 0; # boolean to check if TFname is in AC field or ID
    my @cols; # list of elementes in field AC
    my $retrievingACs = ""; # final "list" with all AC's from the TFnames required

    open( my $db, "<", $database) or die "Cannot open file $!";
    while(my $db_row = <$db>){
        chomp($db_row);
        next unless($db_row =~ /^AC/ || $db_row =~ /^ID/);
        if( $db_row =~ /^AC\s+[^MA{0,1}\d{4,5}(_\d){0,1}(\.\d+){0,1}]/){ # si encuentra algo distinto de MA..
            $isinAC = 1;
            last;
        } elsif( $db_row =~ /^ID\s+[^MA{0,1}\d{4,5}(_\d){0,1}(\.\d+){0,1}]/){
            $isinAC = 0;
            last;
        } # else, no TFname provided
    } #while
    close $db;

    # Generate array of ACs
    open( $db, "<", $database) or die "Cannot open file $!";
    while(my $db_row = <$db>){ # does this continues from the line before or does it starts over again
        chomp($db_row);
        next unless($db_row =~ /^AC/ || $db_row =~ /^ID/);

        if( $db_row =~ /^AC/){
            @cols = split(/\s+/,$db_row); # guarda el AC para cada TF
        }
        foreach my $name (@TFnames){
            if( $db_row =~ /.*$name.*/){
              if( !$isinAC && $db_row =~ /^AC/){ # if TF name is found in AC field
                $retrievingACs .= $cols[1].",";
              } elsif( $isinAC && $db_row =~ /^ID/){ # if TF is found in ID field
                $retrievingACs .= $cols[1].",";
              }
            } # if
        } # foreach
    } # while
    close $db;

    return $retrievingACs;
} # sub

sub writeRow {
  my ( $TF, $target, $interaction_details ) = @_;
  my ($rowtofile, $pos, $scores, $i);
  $rowtofile = "";

  foreach my $coordinates ( keys %{ $interaction_details }){
    # report direct interaction TFx Geney
    $rowtofile .= $TF."\t".$target."\t".$coordinates;  #    TFx (regulates) Genex (in) Coordinates

    $pos = "";
    $scores = "";
    foreach my $positions ( keys %{ $interaction_details->{$coordinates} } ){
      if ($pos eq ""){
        $pos = $positions."";
        $i=0;
        foreach my $s ( keys %{ $interaction_details->{$coordinates}->{$positions} } ) {
          if(!$i){
            $scores = $s."";
            $i++;
          } else {
            $scores .= "_".$s;
          }
        } # foreach score
      } else {
        $pos .= ";".$positions;
        $i=0;
        foreach my $s ( keys %{ $interaction_details->{$coordinates}->{$positions} } ) {
          if(!$i){
            $scores .= ";".$s;
            $i++;
          } else {
            $scores .= "_".$s;
          }
        } # foreach score
      }
    } # foreach position
    $rowtofile .= "\t".$pos."\t".$scores."\n";
  }

  return $rowtofile;
}
