; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124906_NgglST.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 6 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124906_fgMS9b_6nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	6
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124906_NgglST.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124906_fgMS9b_6nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/6nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	4.80769230769231e-06
; Sequence type                	DNA
; Nb of sequences              	491
; Sum of sequence lengths      	245500
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	240194
; total overlapping occurrences	5209
; total non overlapping occ    	234985
; alphabet size                	4
; nb possible oligomers        	2080
; oligomers tested for significance	2080
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
gtcaac	gtcaac|gttgac	0.0004297171609	158	103.22	3.3e-07	6.9e-04	3.16	1	0	790
aagtca	aagtca|tgactt	0.0007169314601	222	172.20	0.00015	3.2e-01	0.50	2	0	1110
cacgtc	cacgtc|gacgtg	0.0001751379974	67	42.07	0.00024	5.0e-01	0.31	3	1	335
; Host name	rsat
; Job started	2026-05-16.124907
; Job done	2026-05-16.124907
; Seconds	0.33
;	user	0.33
;	system	0.03
;	cuser	0.12
;	csystem	0.01
