; oligo-analysis -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/11/tmp_sequence_2026-05-11.133234_ntyPW4.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return freq,mseq,occ,proba,rank,ratio,zscore -1str -noov -quick_if_possible -seqtype dna -markov 2 -pseudo 0.01 -l 8 -o $RSAT/public_html/tmp/www-data/2026/05/11/oligo-analysis_2026-05-11.133234_GlhVyT_8nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42.
; Program version 1.169
; Slow counting mode
; Detection of over-represented words (right-tail test)
; Oligomer length 8
; Input file $RSAT/public_html/tmp/www-data/2026/05/11/tmp_sequence_2026-05-11.133234_ntyPW4.fasta.purged
; Input format fasta
; Output file $RSAT/public_html/tmp/www-data/2026/05/11/oligo-analysis_2026-05-11.133234_GlhVyT_8nt.tab
; Discard overlapping matches
; Counted on a single strand
; Background model Markov
; Background estimation method Markov model estimated from input sequences
; Markov chain order 2
; Pseudo-frequency 0.01
; Pseudo-frequency per oligo 1.52587890625e-07
; Sequence type DNA
; Nb of sequences 45
; Sum of sequence lengths 9000
; discarded residues (other letters than ACGT)
; discarded occurrences (contain discarded residues)
; nb possible positions 8685
; total oligo occurrences 8685
; total overlapping occurrences 42
; total non overlapping occ 8643
; alphabet size 4
; nb possible oligomers 65536
; oligomers tested for significance 7345
; Sequences:
; snR84|gene_len=550 200
; snR13|gene_len=124 200
; snR63|gene_len=255 200
; snR47|gene_len=99 200
; snR8|gene_len=190 200
; snR58|gene_len=96 200
; snR9|gene_len=187 200
; snR81|gene_len=201 200
; snR62|gene_len=100 200
; snR17a|gene_len=490 200
; snR35|gene_len=204 200
; snR36|gene_len=182 200
; snR50|gene_len=90 200
; snR46|gene_len=197 200
; snR10|gene_len=245 200
; snR48|gene_len=113 200
; snR39B|gene_len=96 200
; snR82|gene_len=268 200
; snR61|gene_len=90 200
; snR30|gene_len=606 200
; snR34|gene_len=203 200
; snR17b|gene_len=462 200
; snR45|gene_len=172 200
; snR83|gene_len=306 200
; snR11|gene_len=258 200
; snR86|gene_len=1004 200
; snR56|gene_len=88 200
; snR40|gene_len=97 200
; snR66|gene_len=86 200
; snR49|gene_len=165 200
; snR3|gene_len=194 200
; snR128|gene_len=126 200
; snR37|gene_len=386 200
; snR69|gene_len=101 200
; snR87|gene_len=109 200
; snR42|gene_len=351 200
; snR64|gene_len=101 200
; snR53|gene_len=91 200
; snR80|gene_len=171 200
; snR52|gene_len=92 200
; snR4|gene_len=186 200
; snR32|gene_len=188 200
; snR71|gene_len=90 200
; snR65|gene_len=100 200
; snR189|gene_len=189 200
;
; column headers
; 1 seq oligomer sequence
; 2 id oligomer identifier
; 3 obs_freq observed relative frequency
; 4 exp_freq expected relative frequency
; 5 occ observed occurrences
; 6 exp_occ expected occurrences
; 7 occ_P occurrence probability (binomial)
; 8 occ_E E-value for occurrences (binomial)
; 9 occ_sig occurrence significance (binomial)
; 10 rank rank
; 11 ovl_occ number of overlapping occurrences (discarded from the count)
; 12 forbocc forbidden positions (to avoid self-overlap)
; 13 zscore z-score (Gaussian approximation)
; 14 exp_var estimation of the variance on occurrences
; 15 ratio observed/expected ratio
; 16 ms number of matching sequences
; 17 exp_ms expected number of matching sequences
; 18 ms_P matching sequence probability (binomial)
; 19 ms_E E-value for matching sequences (binomial)
; 20 ms_sig matching sequenc significance (binomial)
; 21 ms_freq proportion of matching sequences (sequences with at least one occurrence)
; 22 exp_msf expected proportion matching sequences
; 23 ms_rati observed/expected numbers of matching sequences
#seq id obs_freq exp_freq occ exp_occ occ_P occ_E occ_sig rank ovl_occ forbocc zscore exp_var ratio ms exp_ms ms_P ms_E ms_sig ms_freq exp_msf ms_rati
aagtgacc aagtgacc 0.0003454231434 0.0000066516259 3 0.06 3.1e-05 2.3e-01 0.65 1 0 21 12.24 0.058 51.9306 3 0.06 2.9e-05 1.9 -0.3 0.06667 0.00128 51.96380
acgcttcc acgcttcc 0.0003454231434 0.0000090228170 3 0.08 7.6e-05 5.6e-01 0.26 2 0 21 10.44 0.078 38.2833 2 0.08 0.00285 1.9e+02 -2.3 0.04444 0.00174 25.54431
acgtagaa acgtagaa 0.0004605641911 0.0000271328872 4 0.24 0.00011 7.8e-01 0.11 3 0 28 7.75 0.236 16.9744 4 0.24 9.3e-05 6.1 -0.8 0.08889 0.00522 17.01864
gtgtacgg gtgtacgg 0.0003454231434 0.0000101711576 3 0.09 0.00011 7.9e-01 0.10 4 0 21 9.80 0.088 33.9610 3 0.09 0.00010 6.6 -0.8 0.06667 0.00196 33.99422
tacgtaga tacgtaga 0.0004605641911 0.0000276821867 4 0.24 0.00011 8.4e-01 0.07 5 0 28 7.67 0.240 16.6376 4 0.24 0.00010 6.6 -0.8 0.08889 0.00533 16.68182
; Host name rsat
; Job started 2026-05-11.133235
; Job done 2026-05-11.133235
; Seconds 0.45
; user 0.45
; system 0.04
; cuser 0
; csystem 0