#!/usr/bin/perl -w
use strict;

################################################################
## This script creates a report about a set of analyzed regulons
##
## Tested with some maize clusters from http://www.ncbi.nlm.nih.gov/pubmed/25918418
## Bruno Contreras Moreira, Jaime Castro Mondragon, Jacques Van Helden

## parameters
my $RNDSAMPLES =50;
my $MAXSIG     =10; #20
my $MAXSIGMEME =50;
my $MAXSIGGO   =30;
my $MINCOR     =0.7;
my $MINNCOR    =0.5; 

my $CEX        =2;
my $RIMGWIDTH  =400;
my $REPIMGWIDTH=100;

# input folders, one per regulon, previously processed with make -f regulons.mk
my @REGULONS= ( 1 .. 14 ); 

my $ORTHSPECIES = 'Brachypodium_distachyon.v1.0.29';
my $EXPRDIR = 'expression_plots/';

my @sourcefiles = qw( HOWTO.txt report.pl go.mk meme-regulons.mk peak-regulons.mk regulons.mk );
my @sourcedirs  = @REGULONS;

my $RVERBOSE = 0;
my $Rparams = '';
if(!$RVERBOSE){ $Rparams = '-q 2>&1 > /dev/null' }

my $outdir = $ARGV[0] || die "# usage: $0 <output folder>\n";

mkdir($outdir) if(! -e $outdir);

print "# output folder : $outdir\n";
print "# EXPRDIR = $EXPRDIR\n";
print "# ORTHSPECIES=$ORTHSPECIES\n";
print "# RNDSAMPLES=$RNDSAMPLES MAXSIGGO=$MAXSIGGO MAXSIGMEME=$MAXSIGMEME MAXSIG=$MAXSIG MINCOR=$MINCOR MINNCOR=$MINNCOR\n";

open(REPORT,'>',$outdir.'/report.tab');

#print REPORT "# regulon\tpromoters\tGO enrichment\tMEME\tMEME-logo\tMEME-orth\tMEME-footDB\tpeaks-oligo\tpeaks-dyad\tpeaks-logo\tpeaks-orth\tpeaks-footDB\n";
#print REPORT "# cluster\tGO enrichment\tMEME\tMEME-orth\tMEME-footDB\tpeaks-oligo\tpeaks-dyad\tpeaks-orth\tpeaks-footDB\n";
print REPORT "# name\tGO enrichment\tMEME\tpeaks-oligo\tpeaks-dyad\tpeaks-orth\tfootprintDB\n";


foreach my $reg (@REGULONS)
{
    ##1) GO analysis

    ##2) MEME:
    ##2.1) Evalue of regulon vs random
    ##2.2) sig of motif vs footDB 
    ##2.3) sig of orthologues.scan.tab from mayze and sorghum
    
    ##3) peak-motifs
    ##3.1) oligo sig of regulon vs random 
    ##3.2) dyad sig or regulon vs random 
    ##3.3) sig of motif vs footDB 
    ##3.4) sig of orthologues.scan.tab from mayze and sorghum

    print "\n# Making report of $reg\n";

    # print cluster name, contents and plot of expression profile
    my $exprplot = "$outdir/regulon$reg.expression.png";
    system("cp $EXPRDIR/$reg.png $exprplot");
    print REPORT "<b>cluster $reg</b><br><a href='./regulon$reg.expression.png'><img width='$REPIMGWIDTH' src='./regulon$reg.expression.png'></a><br>";
    printf(REPORT "sequences: %d",(split(/\s+/,`wc -l $reg/regulon$reg.txt`))[0]);

	### GO analysis
	my $sorted_report = "$outdir/regulon$reg.go.report.tab";
    my $report = "$outdir/regulon$reg.go.report.raw.tab";
	open(GOREPT,">",$report);
	
    my ($best_sig,$best_GO,$best_annot,$formatted_annot) = (0,'NA','','');
    open(GO,"$reg/regulon$reg.go.tab") || warn "# cannot find $reg/regulon$reg.go.tab";
    while(<GO>)
    {
        #GO:0043530 GO:0043530 adenosine... 1 1 1 1 00 24111 1.00000 4.1e-05 0.27327 0.56 1
        next if(/^#/);
        my @data = split(/\t/,$_);
        $best_GO = $data[0];

        my @words = split(/\s+/,$data[2]);
        if(scalar(@words) > 3)
        {
            $formatted_annot = join(' ',@words[0 .. 2]).'..';
        }
        else{ $formatted_annot = $data[2] }
        

        $best_annot = "<a href=\"http://amigo.geneontology.org/amigo/term/$best_GO\">$best_GO</a><br>$formatted_annot";
        $best_sig = $data[13];
        if($best_sig > $MAXSIGGO){ $best_sig = $MAXSIGGO }
	    last;
    }
    close(GO);

    print GOREPT "$reg\t$best_sig\t$best_GO\n";

    foreach my $rnd (1 .. $RNDSAMPLES)
    {
        ($best_sig,$best_GO) = (0,'NA');
        open(GO,"$reg/random$reg-$rnd.go.tab") || warn "# cannot find $reg/random$reg-$rnd.go.tab";
        while(<GO>)
        {
            next if(/^#/);
            my @data = split(/\t/,$_);
            $best_GO = $data[0];
            $best_sig = $data[13];
            if($best_sig > $MAXSIGGO){ $best_sig = $MAXSIGGO }
            last;
        }
        close(GO);
        
        print GOREPT "rnd$rnd\t$best_sig\t$best_GO\n";
    }

    close(GOREPT);

    system("sort -s -k2,2nr $report > $sorted_report");
    my $regulon_idx = -1;
    open(SORTREPT,$sorted_report);
    while(<SORTREPT>)
    {
        $regulon_idx++;
        next if(/^rnd-\d+/);
        last;
    }
    close(SORTREPT);

    my $colors = get_Rplot_colors($regulon_idx,$RNDSAMPLES+1);

    open(RSHELL,"|R --no-save $Rparams ") || die "# $0 : cannot call R: $!\n";
    print RSHELL<<EOR;
    png("$outdir/regulon$reg.go.report.png",width=$RIMGWIDTH);
    colors = c($colors)
    godata = read.table(file="$sorted_report",header=F,check.names=F);
    barplot( godata\$V2, names.arg=godata\$V1, col=colors, ylab="max significance", ylim = c(0,$MAXSIGGO), cex.lab=$CEX, cex.axis=$CEX)
    legend("topright", inset=.05, c("$reg cluster","$RNDSAMPLES random clusters"),
        fill=c('black','grey'), cex=$CEX)
    dev.off()
    q()
EOR
    close RSHELL;

    print REPORT "\t<a href='./regulon$reg.go.report.png'><img width='$REPIMGWIDTH' src='./regulon$reg.go.report.png'></a>";
    print REPORT "<br>$best_annot";

    ## MEME analysis
    $sorted_report = "$outdir/regulon$reg.meme.report.tab";
    $report = "$outdir/regulon$reg.meme.report.raw.tab";
    open(MEMEREPT,">",$report);

    ($best_sig) = (0);
    open(MEME,"$reg/regulon$reg.rm.meme.fna.meme-rm/meme.txt") || 
        warn "# cannot find $reg/regulon$reg.rm.meme.fna.meme-rm/meme.txt";
    while(<MEME>)
    {
        #MOTIF  1 MEME   width =  15  sites =  15  llr = 196  E-value = 1.2e-005
        if(/MOTIF  1/ && /E-value = (\S+)/){ $best_sig = -log($1)/log(10); last }
    }
    close(MEME);
    if($best_sig > $MAXSIGMEME){ $best_sig = $MAXSIGMEME }
    print MEMEREPT "$reg\t$best_sig\n";

    foreach my $rnd (1 .. $RNDSAMPLES)
    {
        ($best_sig) = (0);
        open(MEME,"$reg/random$reg-$rnd.rm.meme.fna.meme-rm/meme.txt") || 
            warn "# cannot find $reg/random$reg-$rnd.rm.meme.fna.meme-rm/meme.txt";
        while(<MEME>)
        {
            if(/MOTIF  1/ && /E-value = (\S+)/){ $best_sig = -log($1)/log(10); last }
        }
        close(MEME);
        if($best_sig > $MAXSIGMEME){ $best_sig = $MAXSIGMEME }
        print MEMEREPT "rnd-$rnd\t$best_sig\n";
    }

    close(MEMEREPT);

    system("sort -s -k2,2nr $report > $sorted_report");
    $regulon_idx = -1;
    open(SORTREPT,$sorted_report);
    while(<SORTREPT>)
    {
        $regulon_idx++;
        next if(/^rnd-\d+/);
        last;
    }
    close(SORTREPT); 

    $colors = get_Rplot_colors($regulon_idx,$RNDSAMPLES+1);

    open(RSHELL,"|R --no-save $Rparams ") || die "# $0 : cannot call R: $!\n";
    print RSHELL<<EOR;
    png("$outdir/regulon$reg.meme.report.png",width=$RIMGWIDTH);
    colors = c($colors)
    memedata = read.table(file="$sorted_report",header=F,check.names=F);
    barplot( memedata\$V2, names.arg=memedata\$V1, col=colors, ylab="max significance", ylim = c(0,$MAXSIGMEME), cex.lab=$CEX, cex.axis=$CEX)
    legend("topright", inset=.05, c("$reg cluster","$RNDSAMPLES random clusters"),
        fill=c('black','grey'), cex=$CEX)
    dev.off()
    q()
EOR
    close RSHELL;

    #print REPORT "\t<a href='./regulon$reg.meme.report.png'><img width='$REPIMGWIDTH' src='./regulon$reg.meme.report.png'></a>\t";
    print REPORT "\t<a href='./regulon$reg.meme.report.png'><img width='$REPIMGWIDTH' src='./regulon$reg.meme.report.png'></a>";

    opendir(MEMEDIR,"$reg/regulon$reg.rm.fna.meme-rm/");
    foreach my $logofile (grep {/logo\d+.png/} readdir(MEMEDIR))
    {
        #system("cp $reg/regulon$reg.rm.fna.meme-rm/$logofile $outdir/$reg.meme.$logofile");
        #print REPORT "<a href='$reg.meme.$logofile'><img width='$REPIMGWIDTH' src='./$reg.meme.$logofile'></a><br>";
    }
    close(MEMEDIR);

    ## meme orths mapping
    open(RSHELL,"|R --no-save $Rparams ") || die "# $0 : cannot call R: $!\n";
    print RSHELL<<EOR;
    png("$outdir/regulon$reg.meme.orth.report.png",width=$RIMGWIDTH);
    data = read.csv(file="$reg/regulon$reg.rm.meme.fna.meme-rm/orthologues.scan.tab",comment.char=";",sep="\\t");
    shuf = read.csv(file="$reg/regulon$reg.rm.meme.fna.meme-rm/orthologues_perm1.scan.tab",comment.char=";",sep="\\t");

    boxplot( data\$sig, shuf\$sig, col=c('grey20','grey'), 
        varwidth=T, ylab="motif scan significance including orthologues",
        names=c("$reg meme motifs","$reg shuffled motifs"))
    #legend("topright", inset=-0.05, c("$reg meme motifs","$reg shuffled motifs"),
    #    fill=c('grey20','grey'), cex=1.0, horiz=T)
    dev.off()
    q()
EOR
    close RSHELL;
   
    #printf REPORT ("\t<a href='./regulon$reg.meme.orth.report.png'><img width='%d' src='./regulon$reg.meme.orth.report.png'></a>",$REPIMGWIDTH);
    #system("cp $reg/regulon$reg.rm.fna.meme-rm/orthologues.scan.png $outdir/regulon$reg.rm.fna.meme-rm.orthologues.scan.png");
    #system("cp $reg/regulon$reg.rm.fna.meme-rm/orthologues_perm1.scan.png $outdir/regulon$reg.rm.fna.meme-rm.orthologues_perm1.scan.png");
    #print REPORT "<br><a href='regulon$reg.rm.fna.meme-rm.orthologues.scan.png'>map</a> | ";
    #print REPORT "<a href='regulon$reg.rm.fna.meme-rm.orthologues_perm1.scan.png'>shuffled</a>";

    ## meme motifs footDB scan
    $sorted_report = "$outdir/regulon$reg.meme.footDB.report.tab";
    $report = "$outdir/regulon$reg.meme.footDB.report.raw.tab";
    open(MEMEFOOTDB,">",$report);
   
    my $best_Ncor = 0; 
    my $file = "$reg/regulon$reg.rm.meme.fna.meme-rm/footDB.tab";
    open(MEMEMOTIF,$file) || warn "# cannot find $file\n"; 
    while(<MEMEMOTIF>)
    {
        #id1    id2 name1   name2   cor Ncor    logoDP
        next if(/^[;#]/);
        my @data = split(/\t/,$_);
        next if($data[4] < $MINCOR || $data[5] < $MINNCOR);
        $best_Ncor = $data[5];
        last;
    }
    close(PEAKMOTIF);
    print MEMEFOOTDB "$reg\t$best_Ncor\n";

    foreach my $rnd (1 .. $RNDSAMPLES)
    {
        $best_Ncor = 0;
        $file = "$reg/random$reg-$rnd.rm.meme.fna.meme-rm/footDB.tab";
        open(MEMEMOTIF,$file) || warn "# cannot find $file\n";
        while(<MEMEMOTIF>)
        {
            next if(/^[;#]/);
            my @data = split(/\t/,$_);
            next if($data[4] < $MINCOR || $data[5] < $MINNCOR);
            $best_Ncor = $data[5];
            last;
        }
        close(MEMEMOTIF);
        print MEMEFOOTDB "rnd-$rnd\t$best_Ncor\n";
    }

    close(MEMEFOOTDB);

    system("sort -s -k2,2nr $report > $sorted_report");
    $regulon_idx = -1;
    open(SORTREPT,$sorted_report);
    while(<SORTREPT>)
    {
        $regulon_idx++;
        next if(/^rnd-\d+/);
        last;
    }
    close(SORTREPT);

    $colors = get_Rplot_colors($regulon_idx,$RNDSAMPLES+1);

    open(RSHELL,"|R --no-save $Rparams ") || die "# $0 : cannot call R: $!\n";
    print RSHELL<<EOR;
    png("$outdir/regulon$reg.meme.footDB.report.png",width=$RIMGWIDTH);
    colors = c($colors)
    data = read.table(file="$sorted_report",header=F,check.names=F);
    barplot( data\$V2, names.arg=data\$V1, col=colors, ylab="max Ncor in footprintDB", ylim=c(0,1), cex.lab=$CEX, cex.axis=$CEX)
    legend("topright", inset=.05, c("$reg cluster","$RNDSAMPLES random clusters"),
        fill=c('black','grey'), cex=$CEX)
    dev.off()
    q()
EOR
    close RSHELL;

    #print REPORT "\t<a href='./regulon$reg.meme.footDB.report.png'><img width='$REPIMGWIDTH' src='./regulon$reg.meme.footDB.report.png'></a>";
    #mkdir("$outdir/$reg.meme.footDB");
    #system("cp -r $reg/regulon$reg.rm.fna.meme-rm/footDB* $outdir/$reg.meme.footDB");
    #print REPORT "<br><a href='$reg.meme.footDB/footDB_index.html'>summary</a>";

    ## peak-motifs analysis
    $sorted_report = "$outdir/regulon$reg.peaks.report.tab";
    $report = "$outdir/regulon$reg.peaks.report.raw.tab";
    open(PEAKREPT,">",$report);
    
    ($best_sig) = (0);
    my $best_oligo = '';
    $file = "$reg/regulon$reg.rm.fna.peaks-rm/results/oligos_5-7nt/peaks_oligos-2str-noov_5-7nt_mvk2_2.tab";
    open(PEAK,$file) || warn "# cannot find $file\n";
    while(<PEAK>)
    {
        #seq    identifier      exp_freq        occ     exp_occ occ_P   occ_E   occ_sig rank    ovl_occ forbocc
        #caggcag caggcag|ctgcctg 0.0002062705470 13      3.37    5.2e-05 4.2e-01 0.37    1       1       78
        next if(/^[;#]/);
        my @data = split(/\t/,$_);    
        if($data[7] > $best_sig)
        { 
            $best_sig = $data[7]; 
            if($best_sig > $MAXSIG){ $best_sig = $MAXSIG } 
            $best_oligo = $data[1]; 
        }
    } 
    close(PEAK);
    
    my $best_sig_dyad = 0;
    my $best_dyad = '';
    $file = "$reg/regulon$reg.rm.fna.peaks-rm/results/dyads_test_vs_ctrl/peaks_dyads-2str-noov_3nt_sp0-20_bg_monads.tab";
    open(PEAKD,$file) || warn "# cannot find $file\n";
    while(<PEAKD>)
    {
        next if(/^[;#]/);
        my @data = split(/\t/,$_);
        if($data[7] > $best_sig_dyad)
        { 
            $best_sig_dyad = $data[7]; 
            if($best_sig_dyad > $MAXSIG){ $best_sig_dyad = $MAXSIG } 
            $best_dyad = $data[1];
        }
    }
    close(PEAKD);

    print PEAKREPT "$reg\t$best_sig\t$best_sig_dyad\n";

    foreach my $rnd (1 .. $RNDSAMPLES)
    {
        ($best_sig) = (0);
        $file = "$reg/random$reg-$rnd.rm.fna.peaks-rm/results/oligos_5-7nt/peaks_oligos-2str-noov_5-7nt_mvk2_2.tab";
        open(PEAK,$file) || warn "# cannot find $file";
        while(<PEAK>)
        {
            next if(/^[;#]/);
            my @data = split(/\t/,$_);
            if($data[7] > $best_sig){ $best_sig = $data[7]; if($best_sig > $MAXSIG){ $best_sig = $MAXSIG } }
        }
        close(PEAK);

        $best_sig_dyad = 0;
        $file = "$reg/random$reg-$rnd.rm.fna.peaks-rm/results/dyads_test_vs_ctrl/peaks_dyads-2str-noov_3nt_sp0-20_bg_monads.tab";
        open(PEAKD,$file) || warn "# cannot find $file\n";
        while(<PEAKD>)
        {
            next if(/^[;#]/);
            my @data = split(/\t/,$_);
            if($data[7] > $best_sig_dyad){ $best_sig_dyad = $data[7]; if($best_sig_dyad > $MAXSIG){ $best_sig_dyad = $MAXSIG } }
        }
        close(PEAKD);

        print PEAKREPT "rnd-$rnd\t$best_sig\t$best_sig_dyad\n";
    }

    close(PEAKREPT);

    system("sort -s -k2,2nr $report > $sorted_report");
    $regulon_idx = -1;
    open(SORTREPT,$sorted_report);
    while(<SORTREPT>)
    {
        $regulon_idx++;
        next if(/^rnd-\d+/);
        last;
    }
    close(SORTREPT);

    $colors = get_Rplot_colors($regulon_idx,$RNDSAMPLES+1);

    open(RSHELL,"|R --no-save $Rparams ") || die "# $0 : cannot call R: $!\n";
    print RSHELL<<EOR;
    png("$outdir/regulon$reg.peaks.oligos.report.png",width=$RIMGWIDTH);
    colors = c($colors)
    pdata = read.table(file="$outdir/regulon$reg.peaks.report.tab",header=F,check.names=F);
    barplot( pdata\$V2, names.arg=pdata\$V1, col=colors, ylab="max significance", ylim = c(0,$MAXSIG), cex.lab=$CEX, cex.axis=$CEX)
    legend("topright", inset=.05, c("$reg cluster","$RNDSAMPLES random clusters"),
        fill=c('black','grey'), cex=$CEX)
    dev.off()
    q()
EOR
    close RSHELL;

    print REPORT "\t<a href='./regulon$reg.peaks.oligos.report.png'><img width='$REPIMGWIDTH' ".
        "src='./regulon$reg.peaks.oligos.report.png'></a>";
    print REPORT "<br>$best_oligo";

    system("sort -s -k3,3nr $report > $sorted_report");
    $regulon_idx = -1;
    open(SORTREPT,$sorted_report);
    while(<SORTREPT>)
    {
        $regulon_idx++;
        next if(/^rnd-\d+/);
        last;
    }
    close(SORTREPT);

    $colors = get_Rplot_colors($regulon_idx,$RNDSAMPLES+1);

    open(RSHELL,"|R --no-save $Rparams ") || die "# $0 : cannot call R: $!\n";
    print RSHELL<<EOR;
    png("$outdir/regulon$reg.peaks.dyads.report.png",width=$RIMGWIDTH);
    colors = c($colors)
    pdata = read.table(file="$outdir/regulon$reg.peaks.report.tab",header=F,check.names=F);
    barplot( pdata\$V3, names.arg=pdata\$V1, col=colors, ylab="max significance", ylim = c(0,$MAXSIG), cex.lab=$CEX, cex.axis=$CEX)
    legend("topright", inset=.05, c("$reg cluster","$RNDSAMPLES random clusters"),
        fill=c('black','grey'), cex=$CEX)
    dev.off()
    q()
EOR
    close RSHELL;

    #print REPORT "\t<a href='./regulon$reg.peaks.dyads.report.png'><img width='$REPIMGWIDTH' src='./regulon$reg.peaks.dyads.report.png'></a>\t";
    print REPORT "\t<a href='./regulon$reg.peaks.dyads.report.png'><img width='$REPIMGWIDTH' ".
        "src='./regulon$reg.peaks.dyads.report.png'></a>";
    print REPORT "<br>$best_dyad";

    opendir(PEAKSDIR,"$reg/regulon$reg.rm.fna.peaks-rm/results/discovered_motifs");
    my $peaklogos = 0;
    foreach my $motifdir (grep {/_m\d+$/} readdir(PEAKSDIR))
    {
        $peaklogos++;
        #my $logofile = "$reg/regulon$reg.rm.fna.peaks-rm/results/discovered_motifs/$motifdir/peaks_$motifdir\_logo.png";
        #system("cp $logofile $outdir/$reg.peaks.logo$peaklogos\.png");
        #print REPORT "<a href='./$reg.peaks.logo$peaklogos\.png'><img width='$REPIMGWIDTH' src='./$reg.peaks.logo$peaklogos\.png'></a><br>";
    }
    close(PEAKSDIR);


    ## peak-motifs orths mapping
    open(RSHELL,"|R --no-save $Rparams ") || die "# $0 : cannot call R: $!\n";
    print RSHELL<<EOR;
    png("$outdir/regulon$reg.peaks.orth.report.png",width=$RIMGWIDTH);
    data = read.csv(file="$reg/regulon$reg.rm.fna.peaks-rm/orthologues.scan.tab",comment.char=";",sep="\\t");
    shuf = read.csv(file="$reg/regulon$reg.rm.fna.peaks-rm/orthologues_perm1.scan.tab",comment.char=";",sep="\\t");

    boxplot( data\$sig, shuf\$sig, col=c('grey20','grey'), 
        varwidth=T, ylab="motif scan significance including orthologues",
        names=c("$reg peak-motifs","$reg shuffled motifs"), cex.lab=$CEX, cex.axis=$CEX)
    #legend("topright", inset=-0.05, c("$reg peak-motifs","$reg shuffled motifs"),
    #    fill=c('grey20','grey'), cex=1.0, horiz=T)
    dev.off()
    q()
EOR
    close RSHELL;

    printf REPORT ("\t<a href='./regulon$reg.peaks.orth.report.png'><img width='%d' src='./regulon$reg.peaks.orth.report.png'></a>",$REPIMGWIDTH);
    system("cp $reg/regulon$reg.rm.fna.peaks-rm/orthologues.scan.png $outdir/regulon$reg.rm.fna.peaks-rm.orthologues.scan.png");
    system("cp $reg/regulon$reg.rm.fna.peaks-rm/orthologues_perm1.scan.png $outdir/regulon$reg.rm.fna.peaks-rm.orthologues_perm1.scan.png");
    print REPORT "<br><a href='regulon$reg.rm.fna.peaks-rm.orthologues.scan.png'>map</a> | ";
    print REPORT "<a href='regulon$reg.rm.fna.peaks-rm.orthologues_perm1.scan.png'>shuffled</a>";

    ## peak-motifs footDB scan
    $sorted_report = "$outdir/regulon$reg.peaks.footDB.report.tab";
    $report = "$outdir/regulon$reg.peaks.footDB.report.raw.tab";
    open(PEAKFOOTDB,">",$report);

    $best_Ncor = 0;
    $file = "$reg/regulon$reg.rm.fna.peaks-rm/results/discovered_vs_db/peaks_motifs_vs_db_footDB.tab";
    open(PEAKMOTIF,$file) || warn "# cannot find $file\n";
    while(<PEAKMOTIF>)
    {
        #id1    id2 name1   name2   cor Ncor    logoDP  NIcor   NSW SSD NsEucl  w1  w2  w   W   Wr  wr1 wr2 
        #oligos_5-7nt_m2    2012.1_    oligos_5-7nt_m2 TRANSFAC_2012.1_    0.965   0.965   8.843   0.978   0.983   0.3368  
        next if(/^[;#]/);
        my @data = split(/\t/,$_);
        next if($data[4] < $MINCOR || $data[5] < $MINNCOR);
        if($data[5] > $best_Ncor){ $best_Ncor = $data[5] }
    }
    close(PEAKMOTIF);
    print PEAKFOOTDB "$reg\t$best_Ncor\n";

    foreach my $rnd (1 .. $RNDSAMPLES)
    {
        $best_Ncor = 0;   
        $file = "$reg/random$reg-$rnd.rm.fna.peaks-rm/results/discovered_vs_db/peaks_motifs_vs_db_footDB.tab";
        open(PEAKMOTIF,$file) || warn "# cannot find $file\n";
        while(<PEAKMOTIF>)
        {
            next if(/^[;#]/);
            my @data = split(/\t/,$_);
            next if($data[4] < $MINCOR || $data[5] < $MINNCOR);
            if($data[5] > $best_Ncor){ $best_Ncor = $data[5] }
        }
        close(PEAKMOTIF);
        print PEAKFOOTDB "rnd-$rnd\t$best_Ncor\n";
    }

    close(PEAKFOOTDB);

    system("sort -s -k2,2nr $report > $sorted_report");
    $regulon_idx = -1;
    open(SORTREPT,$sorted_report);
    while(<SORTREPT>)
    {
        $regulon_idx++;
        next if(/^rnd-\d+/);
        last;
    }
    close(SORTREPT);

    $colors = get_Rplot_colors($regulon_idx,$RNDSAMPLES+1);    
   
    open(RSHELL,"|R --no-save $Rparams ") || die "# $0 : cannot call R: $!\n";
    print RSHELL<<EOR;
    png("$outdir/regulon$reg.peaks.footDB.report.png",width=$RIMGWIDTH);
    colors = c($colors)
    data = read.table(file="$sorted_report",header=F,check.names=F);
    barplot( data\$V2, names.arg=data\$V1, col=colors, ylab="max Ncor in footprintDB", ylim=c(0,1), cex.lab=$CEX, cex.axis=$CEX)
    legend("topright", inset=.05, c("$reg cluster","$RNDSAMPLES random clusters"),
        fill=c('black','grey'), cex=$CEX/1.5)
    dev.off()
    q()
EOR
    close RSHELL; 

    print REPORT "\t<a href='./regulon$reg.peaks.footDB.report.png'><img width='$REPIMGWIDTH' src='./regulon$reg.peaks.footDB.report.png'></a>";
    system("cp -r $reg/regulon$reg.rm.fna.peaks-rm/results/discovered_vs_db/ $outdir/$reg.peaks.discovered_vs_db");
    print REPORT "<br><a href='$reg.peaks.discovered_vs_db/peaks_motifs_vs_db_footDB_alignments_1ton.html'>align</a> | ";
    print REPORT "<a href='$reg.peaks.discovered_vs_db/peaks_motifs_vs_db_footDB_index.html'>summary</a>";

    # end report line
    print REPORT "\n";
}

# add source code and data to report
mkdir("$outdir/source");
foreach my $file (@sourcefiles)
{
    system("cp $file $outdir/source");
}

foreach my $dir (@sourcedirs)
{
    mkdir("$outdir/source/$dir");
    system("cp $dir/*.fna $outdir/source/$dir");
    system("cp $dir/*.txt $outdir/source/$dir");
}

print REPORT "# parameters:\n ORTHSPECIES=$ORTHSPECIES\n".
    "RNDSAMPLES=$RNDSAMPLES MAXSIGGO=$MAXSIGGO MAXSIGMEME=$MAXSIGMEME MAXSIG=$MAXSIG MINCOR=$MINCOR MINNCOR=$MINNCOR\n".
    "input data and code: <a href=\"./source/\">source</a>\n";

close(REPORT);

# make HTML report
system("text-to-html -i $outdir/report.tab -o $outdir/index.html");

######################################

sub get_Rplot_colors {
  my ($reference_idx,$n_of_orgs) = @_;
  my $colors = '';
  if($reference_idx > -1){
    if($reference_idx>0){ $colors = "rep('grey',$reference_idx)," }
    $colors .= "'black'"; # reference
    if($reference_idx<$n_of_orgs-1){ $colors .= sprintf(",rep('grey',%d)",($n_of_orgs-($reference_idx+1))) }
  }
  else{ $colors = "rep('grey',$n_of_orgs)" }

  return $colors;
}

