; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.125221_V1BqtH.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 6 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.125221_GFR1LJ_6nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	6
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.125221_V1BqtH.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.125221_GFR1LJ_6nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/6nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	4.80769230769231e-06
; Sequence type                	DNA
; Nb of sequences              	486
; Sum of sequence lengths      	243000
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	236480
; total overlapping occurrences	5068
; total non overlapping occ    	231412
; alphabet size                	4
; nb possible oligomers        	2080
; oligomers tested for significance	2080
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
agaaga	agaaga|tcttct	0.0013525885944	420	319.86	5e-08	1.0e-04	3.98	1	47	2100
aagaag	aagaag|cttctt	0.0012397965441	375	293.19	2.5e-06	5.2e-03	2.28	2	37	1875
acagag	acagag|ctctgt	0.0003910570769	136	92.48	1.3e-05	2.8e-02	1.55	3	0	680
ccttga	ccttga|tcaagg	0.0003007581664	108	71.12	2.8e-05	5.9e-02	1.23	4	0	540
cagaga	cagaga|tctctg	0.0004506789596	151	106.58	2.9e-05	6.1e-02	1.22	5	0	755
gaagaa	gaagaa|ttcttc	0.0013322543944	387	315.05	4.9e-05	1.0e-01	1.00	6	51	1935
atctgc	atctgc|gcagat	0.0002396802545	88	56.68	7e-05	1.5e-01	0.84	7	1	440
ctcttc	ctcttc|gaagag	0.0005950517798	187	140.72	0.00011	2.3e-01	0.63	8	3	935
ctgaga	ctgaga|tctcag	0.0002823569706	98	66.77	0.00020	4.2e-01	0.37	9	0	490
cagatc	cagatc|gatctg	0.0002410107639	86	56.99	0.00020	4.3e-01	0.37	10	5	430
tctgca	tctgca|tgcaga	0.0003074358173	105	72.70	0.00022	4.5e-01	0.34	11	5	525
acgaag	acgaag|cttcgt	0.0002820306192	97	66.69	0.00029	6.1e-01	0.22	12	0	485
gaagac	gaagac|gtcttc	0.0004224370152	136	99.90	0.00035	7.2e-01	0.14	13	0	680
cttcga	cttcga|tcgaag	0.0002642068143	91	62.48	0.00042	8.7e-01	0.06	14	3	455
; Host name	rsat
; Job started	2026-05-16.125222
; Job done	2026-05-16.125223
; Seconds	0.42
;	user	0.42
;	system	0.02
;	cuser	0.11
;	csystem	0.02
; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.125221_V1BqtH.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 7 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.125221_GFR1LJ_7nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	7
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.125221_V1BqtH.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.125221_GFR1LJ_7nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/7nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	1.220703125e-06
; Sequence type                	DNA
; Nb of sequences              	486
; Sum of sequence lengths      	243000
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	235658
; total overlapping occurrences	4070
; total non overlapping occ    	231588
; alphabet size                	4
; nb possible oligomers        	8192
; oligomers tested for significance	8192
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
acagaga	acagaga|tctctgt	0.0001755705182	73	41.37	5.6e-06	4.6e-02	1.34	1	0	438
caacttg	caacttg|caagttg	0.0001634405224	68	38.52	1.1e-05	9.1e-02	1.04	2	0	408
ctgtgcc	ctgtgcc|ggcacag	0.0000164964447	15	3.89	1.4e-05	1.2e-01	0.93	3	0	90
acgtggg	acgtggg|cccacgt	0.0000356980770	23	8.41	2.5e-05	2.0e-01	0.70	4	1	138
agaagag	agaagag|ctcttct	0.0002637268983	95	62.15	6.5e-05	5.3e-01	0.28	5	2	570
ctgcaga	ctgcaga|tctgcag	0.0000435498586	25	10.26	7e-05	5.7e-01	0.24	6	5	150
aagaaga	aagaaga|tcttctt	0.0006334149443	198	149.27	8e-05	6.6e-01	0.18	7	32	1188
; Host name	rsat
; Job started	2026-05-16.125223
; Job done	2026-05-16.125224
; Seconds	0.84
;	user	0.84
;	system	0.04
;	cuser	0.14
;	csystem	0.02
; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.125221_V1BqtH.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 8 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.125221_GFR1LJ_8nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	8
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.125221_V1BqtH.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.125221_GFR1LJ_8nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/8nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	3.03988326848249e-07
; Sequence type                	DNA
; Nb of sequences              	486
; Sum of sequence lengths      	243000
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	235262
; total overlapping occurrences	2056
; total non overlapping occ    	233206
; alphabet size                	4
; nb possible oligomers        	32896
; oligomers tested for significance	32896
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
ggcacaga	ggcacaga|tctgtgcc	0.0000065446000	10	1.54	5.1e-06	1.7e-01	0.77	1	0	70
atgtggga	atgtggga|tcccacat	0.0000313560644	22	7.38	1e-05	3.3e-01	0.48	2	0	154
aacagaga	aacagaga|tctctgtt	0.0000768773007	38	18.09	2.9e-05	9.7e-01	0.01	3	0	266
; Host name	rsat
; Job started	2026-05-16.125224
; Job done	2026-05-16.125228
; Seconds	4.02
;	user	4.02
;	system	0.15
;	cuser	0.27
;	csystem	0.03
