; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124744_SQuess.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 6 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124744_M92ZkP_6nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	6
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124744_SQuess.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124744_M92ZkP_6nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/6nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	4.80769230769231e-06
; Sequence type                	DNA
; Nb of sequences              	440
; Sum of sequence lengths      	220000
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	216034
; total overlapping occurrences	4521
; total non overlapping occ    	211513
; alphabet size                	4
; nb possible oligomers        	2080
; oligomers tested for significance	2080
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
acgtgg	acgtgg|ccacgt	0.0002373957950	90	51.29	6.4e-07	1.3e-03	2.88	1	8	450
acgtca	acgtca|tgacgt	0.0002638553590	89	57.00	5.3e-05	1.1e-01	0.96	2	2	445
aagtct	aagtct|agactt	0.0005953781311	175	128.62	5.9e-05	1.2e-01	0.91	3	0	875
cacgtc	cacgtc|gacgtg	0.0001751379974	64	37.84	6.6e-05	1.4e-01	0.86	4	0	320
acgttg	acgttg|caacgt	0.0002647842051	88	57.20	9.4e-05	2.0e-01	0.71	5	7	440
aaagac	aaagac|gtcttt	0.0007922935200	221	171.16	0.00015	3.0e-01	0.52	6	0	1105
gagaca	gagaca|tgtctc	0.0004481183567	133	96.81	0.00028	5.8e-01	0.23	7	0	665
aagact	aagact|agtctt	0.0006139048467	174	132.62	0.00033	7.0e-01	0.16	8	0	870
aacgtg	aacgtg|cacgtt	0.0002922730311	92	63.14	0.00039	8.0e-01	0.09	9	0	460
aaccac	aaccac|gtggtt	0.0004496748016	132	97.15	0.00045	9.3e-01	0.03	10	0	660
; Host name	rsat
; Job started	2026-05-16.124745
; Job done	2026-05-16.124746
; Seconds	0.35
;	user	0.35
;	system	0.02
;	cuser	0.11
;	csystem	0.01
; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124744_SQuess.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 7 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124744_M92ZkP_7nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	7
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124744_SQuess.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124744_M92ZkP_7nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/7nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	1.220703125e-06
; Sequence type                	DNA
; Nb of sequences              	440
; Sum of sequence lengths      	220000
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	215308
; total overlapping occurrences	3623
; total non overlapping occ    	211685
; alphabet size                	4
; nb possible oligomers        	8192
; oligomers tested for significance	8192
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
aaagacc	aaagacc|ggtcttt	0.0001126052703	53	24.24	3e-07	2.5e-03	2.61	1	0	318
aacgtgg	aacgtgg|ccacgtt	0.0000602852262	30	12.98	3.7e-05	3.0e-01	0.52	2	0	180
acgtgga	acgtgga|tccacgt	0.0000836392430	37	18.01	5.8e-05	4.8e-01	0.32	3	1	222
gagccac	gagccac|gtggctc	0.0000398001295	22	8.57	8.9e-05	7.3e-01	0.14	4	0	132
aaacgtg	aaacgtg|cacgttt	0.0001197020728	47	25.77	0.00011	9.0e-01	0.04	5	0	282
; Host name	rsat
; Job started	2026-05-16.124746
; Job done	2026-05-16.124747
; Seconds	0.8
;	user	0.8
;	system	0.03
;	cuser	0.13
;	csystem	0.02
; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124744_SQuess.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -2str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Arabidopsis_thaliana.TAIR10.60 -pseudo 0.01 -l 8 -o $RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124744_M92ZkP_8nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	8
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/16/tmp_sequence_2026-05-16.124744_SQuess.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/16/oligo-analysis_2026-05-16.124744_M92ZkP_8nt.tab
; Discard overlapping matches
; Counted on both strands
; 	grouped by pairs of reverse complements
; Background model             	upstream-noorf
; Organism                     	Arabidopsis_thaliana.TAIR10.60
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Arabidopsis_thaliana.TAIR10.60/oligo-frequencies/8nt_upstream-noorf_Arabidopsis_thaliana.TAIR10.60-noov-2str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	3.03988326848249e-07
; Sequence type                	DNA
; Nb of sequences              	440
; Sum of sequence lengths      	220000
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	214942
; total overlapping occurrences	1772
; total non overlapping occ    	213170
; alphabet size                	4
; nb possible oligomers        	32896
; oligomers tested for significance	32896
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
aaacgtgg	aaacgtgg|ccacgttt	0.0000235552998	17	5.06	2.3e-05	7.6e-01	0.12	1	0	119
; Host name	rsat
; Job started	2026-05-16.124747
; Job done	2026-05-16.124751
; Seconds	3.86
;	user	3.86
;	system	0.12
;	cuser	0.27
;	csystem	0.02
