; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124457_fWnOqA.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 6 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124457_ceJlAB_6nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	6
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124457_fWnOqA.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124457_ceJlAB_6nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/6nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	4.80769230769231e-06
; Sequence type                	DNA
; Nb of sequences              	498
; Sum of sequence lengths      	249000
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	243215
; total overlapping occurrences	5539
; total non overlapping occ    	237676
; alphabet size                	4
; nb possible oligomers        	2080
; oligomers tested for significance	2080
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
aacgtg	aacgtg|cacgtt	0.0002922730311	122	71.09	2.6e-08	5.4e-05	4.27	1	0	610
acgtga	acgtga|tcacgt	0.0002991013056	116	72.75	1.8e-06	3.8e-03	2.42	2	3	580
acgtgc	acgtgc|gcacgt	0.0001205118008	58	29.31	1.9e-06	3.9e-03	2.40	3	1	290
tatata	tatata|tatata	0.0018867755601	559	458.89	3.3e-06	6.8e-03	2.17	4	332	2795
acgttg	acgttg|caacgt	0.0002647842051	104	64.40	3.5e-06	7.3e-03	2.14	5	4	520
atataa	atataa|ttatat	0.0027383266698	775	666.00	2e-05	4.1e-02	1.38	6	76	3875
caacta	caacta|tagttg	0.0006827147754	220	166.05	3.7e-05	7.6e-02	1.12	7	0	1100
cacgta	cacgta|tacgtg	0.0002556463671	95	62.18	6.6e-05	1.4e-01	0.87	8	1	475
aataat	aataat|attatt	0.0028732101965	801	698.81	8.1e-05	1.7e-01	0.77	9	66	4005
tataaa	tataaa|tttata	0.0027829614940	774	676.86	0.00013	2.8e-01	0.55	10	66	3870
aaagac	aaagac|gtcttt	0.0007922935200	245	192.70	0.00016	3.4e-01	0.47	11	0	1225
cgcacg	cgcacg|cgtgcg	0.0000537855000	28	13.08	0.00023	4.7e-01	0.33	12	0	140
agcgaa	agcgaa|ttcgct	0.0001793052532	69	43.61	0.00023	4.8e-01	0.31	13	0	345
cgttga	cgttga|tcaacg	0.0003099964203	107	75.40	0.00035	7.3e-01	0.14	14	0	535
acatgg	acatgg|ccatgt	0.0004047387300	134	98.44	0.00038	7.9e-01	0.10	15	0	670
atatag	atatag|ctatat	0.0012114541838	354	294.64	0.00043	8.9e-01	0.05	16	11	1770
; Host name	rsat
; Job started	2026-05-16.124458
; Job done	2026-05-16.124458
; Seconds	0.4
;	user	0.4
;	system	0.01
;	cuser	0.14
;	csystem	0.02
; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124457_fWnOqA.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 7 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124457_ceJlAB_7nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	7
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124457_fWnOqA.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124457_ceJlAB_7nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/7nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	1.220703125e-06
; Sequence type                	DNA
; Nb of sequences              	498
; Sum of sequence lengths      	249000
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	242336
; total overlapping occurrences	4452
; total non overlapping occ    	237884
; alphabet size                	4
; nb possible oligomers        	8192
; oligomers tested for significance	8192
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
tatataa	tatataa|ttatata	0.0009847318348	336	238.64	1.6e-09	1.3e-05	4.88	1	77	2016
caacgtg	caacgtg|cacgttg	0.0000507724909	35	12.30	9.4e-08	7.7e-04	3.11	2	0	210
aacgtga	aacgtga|tcacgtt	0.0001002991127	49	24.31	6.9e-06	5.7e-02	1.24	3	0	294
acgtgcg	acgtgcg|cgcacgt	0.0000219071274	18	5.31	1.2e-05	9.8e-02	1.01	4	0	108
ctatata	ctatata|tatatag	0.0004961597638	166	120.24	4.5e-05	3.7e-01	0.44	5	13	996
aacgtgc	aacgtgc|gcacgtt	0.0000330808166	21	8.02	9.7e-05	7.9e-01	0.10	6	0	126
acacaca	acacaca|tgtgtgt	0.0002339303942	87	56.69	0.00011	9.1e-01	0.04	7	33	522
; Host name	rsat
; Job started	2026-05-16.124458
; Job done	2026-05-16.124459
; Seconds	0.8
;	user	0.8
;	system	0.03
;	cuser	0.16
;	csystem	0.02
; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124457_fWnOqA.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 8 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124457_ceJlAB_8nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	8
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124457_fWnOqA.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124457_ceJlAB_8nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/8nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	3.03988326848249e-07
; Sequence type                	DNA
; Nb of sequences              	498
; Sum of sequence lengths      	249000
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	241920
; total overlapping occurrences	2168
; total non overlapping occ    	239752
; alphabet size                	4
; nb possible oligomers        	32896
; oligomers tested for significance	32896
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
acaacgtg	acaacgtg|cacgttgt	0.0000153770788	16	3.72	2e-06	6.5e-02	1.18	1	0	112
tatataaa	tatataaa|tttatata	0.0004578314173	161	110.76	4.5e-06	1.5e-01	0.83	2	7	1127
caacgtga	caacgtga|tcacgttg	0.0000166855941	16	4.04	5.5e-06	1.8e-01	0.74	3	0	112
; Host name	rsat
; Job started	2026-05-16.124459
; Job done	2026-05-16.124503
; Seconds	3.78
;	user	3.79
;	system	0.14
;	cuser	0.25
;	csystem	0.06
