; oligo-analysis  -v 1 -sort -i $RSAT/public_html/tmp/www-data/2026/05/02/tmp_sequence_2026-05-02.224300_aU9YyQ.fasta.purged -format fasta -lth occ_sig 0 -uth rank 50 -return occ,proba,rank -1str -noov -quick_if_possible -seqtype dna -bg upstream-noorf -org Homo_sapiens_GRCh38 -pseudo 0.01 -l 4 -o $RSAT/public_html/tmp/www-data/2026/05/02/oligo-analysis_2026-05-02.224300_t2l33k_4nt.tab
; Citation: van Helden et al. (1998). J Mol Biol 281(5), 827-42. 
; Program version              	1.169
; Quick counting mode          
; Detection of over-represented words (right-tail test)
; Oligomer length              	4
; Input file                   	$RSAT/public_html/tmp/www-data/2026/05/02/tmp_sequence_2026-05-02.224300_aU9YyQ.fasta.purged
; Input format                 	fasta
; Output file                  	$RSAT/public_html/tmp/www-data/2026/05/02/oligo-analysis_2026-05-02.224300_t2l33k_4nt.tab
; Discard overlapping matches
; Counted on a single strand
; Background model             	upstream-noorf
; Organism                     	Homo_sapiens_GRCh38
; Background estimation method 	Frequency file
; Expected frequency file      	$RSAT/public_html/data/genomes/Homo_sapiens_GRCh38/oligo-frequencies/4nt_upstream-noorf_Homo_sapiens_GRCh38-noov-1str.freq
; Pseudo-frequency             	0.01
; Pseudo-frequency per oligo   	3.90625e-05
; Sequence type                	DNA
; Nb of sequences              	3205
; Sum of sequence lengths      	99355
; discarded residues           	NA (quick mode)	 (other letters than ACGT)
; discarded occurrences        	NA (quick mode)	 (contain discarded residues)
; nb possible positions        	NA (quick mode)
; total oligo occurrences      	89740
; total overlapping occurrences	2392
; total non overlapping occ    	87348
; alphabet size                	4
; nb possible oligomers        	256
; oligomers tested for significance	256
;
; column headers
;	1	seq            	oligomer sequence
;	2	id             	oligomer identifier
;	3	exp_freq       	expected relative frequency
;	4	occ            	observed occurrences
;	5	exp_occ        	expected occurrences
;	6	occ_P          	occurrence probability (binomial)
;	7	occ_E          	E-value for occurrences (binomial)
;	8	occ_sig        	occurrence significance (binomial)
;	9	rank           	rank
;	10	ovl_occ        	number of overlapping occurrences (discarded from the count)
;	11	forbocc        	forbidden positions (to avoid self-overlap)
#seq	id	exp_freq	occ	exp_occ	occ_P	occ_E	occ_sig	rank	ovl_occ	forbocc
taaa	taaa	0.0071564882806	2781	642.22	0	0e+00	350.00	1	0	8343
aaat	aaat	0.0087707085475	2619	787.08	0	0e+00	350.00	2	0	7857
aata	aata	0.0061745157995	2171	554.10	0	0e+00	350.00	3	55	6513
ataa	ataa	0.0055105383204	2018	494.52	0	0e+00	350.00	4	113	6054
ttaa	ttaa	0.0056973613736	1370	511.28	8e-218	2e-215	214.66	5	0	4110
aaaa	aaaa	0.0085717736607	1694	769.23	1e-183	2e-181	180.61	6	892	5082
aatt	aatt	0.0064547575759	1332	579.25	1e-158	3e-156	155.48	7	0	3996
taat	taat	0.0049782437468	1034	446.75	5e-125	1e-122	121.90	8	19	3102
ttgt	ttgt	0.0048528434501	945	435.49	1.2e-99	3.1e-97	96.51	9	30	2835
ttta	ttta	0.0069422174954	1218	622.99	1.9e-99	4.8e-97	96.32	10	0	3654
attt	attt	0.0085264280498	1398	765.16	2e-94	5.1e-92	91.29	11	0	4194
tgtt	tgtt	0.0053225710120	983	477.65	1.3e-91	3.4e-89	88.47	12	15	2949
tatt	tatt	0.0059966668737	1047	538.14	1.4e-84	3.5e-82	81.46	13	33	3141
atta	atta	0.0050087428261	905	449.48	4.8e-80	1.2e-77	76.91	14	27	2715
ttat	ttat	0.0053384682433	883	479.07	1e-61	2.6e-59	58.59	15	32	2649
tttt	tttt	0.0081520406963	1189	731.56	6.8e-55	1.7e-52	51.76	16	615	3567
aatg	aatg	0.0051888154932	821	465.64	2.1e-50	5.4e-48	47.27	17	0	2463
attg	attg	0.0036970305347	619	331.77	2.6e-45	6.7e-43	42.17	18	0	1857
tttg	tttg	0.0064770669544	951	581.25	2.7e-45	6.8e-43	42.17	19	0	2853
gttt	gttt	0.0050483599644	780	453.04	1.7e-44	4.2e-42	41.37	20	0	2340
aaac	aaac	0.0052412281395	794	470.35	1.9e-42	5.0e-40	39.30	21	0	2382
tttc	tttc	0.0066788013083	958	599.36	7.1e-42	1.8e-39	38.74	22	0	2874
atgt	atgt	0.0046867609223	725	420.59	1.2e-41	3.1e-39	38.52	23	0	2175
cttt	cttt	0.0065206637978	927	585.16	3.2e-39	8.2e-37	36.09	24	0	2781
attc	attc	0.0041475936208	649	372.21	7.5e-39	1.9e-36	35.71	25	0	1947
tgaa	tgaa	0.0057265866594	834	513.90	8.8e-39	2.2e-36	35.65	26	0	2502
ttct	ttct	0.0068076415672	956	610.92	1.6e-38	4.1e-36	35.38	27	21	2868
ttga	ttga	0.0045874554019	697	411.68	7.8e-38	2.0e-35	34.70	28	0	2091
catt	catt	0.0051018664899	753	457.84	6.8e-37	1.7e-34	33.76	29	0	2259
actt	actt	0.0045446285918	687	407.83	1.1e-36	2.8e-34	33.56	30	0	2061
tctt	tctt	0.0058890709004	830	528.49	3.8e-34	9.9e-32	31.01	31	17	2490
aact	aact	0.0042820831901	637	384.27	2.4e-32	6.1e-30	29.21	32	0	1911
atat	atat	0.0049684636032	715	445.87	4.5e-32	1.2e-29	28.94	33	86	2145
tata	tata	0.0042120245452	618	377.99	5.7e-30	1.5e-27	26.83	34	64	1854
tgta	tgta	0.0043039391844	626	386.24	2.1e-29	5.3e-27	26.27	35	0	1878
gaat	gaat	0.0041884773560	581	375.87	5.8e-23	1.5e-20	19.83	36	0	1743
tcat	tcat	0.0044575068706	600	400.02	6.2e-21	1.6e-18	17.80	37	9	1800
gtgt	gtgt	0.0035086314320	486	314.86	2.2e-19	5.6e-17	16.25	38	52	1458
caat	caat	0.0037625697314	514	337.65	2.6e-19	6.7e-17	16.18	39	0	1542
gtat	gtat	0.0028927918343	414	259.60	6.3e-19	1.6e-16	15.79	40	0	1242
ttca	ttca	0.0056612705565	708	508.04	2.6e-17	6.7e-15	14.18	41	0	2124
tact	tact	0.0033990851871	457	305.03	2.8e-16	7.2e-14	13.14	42	11	1371
ctta	ctta	0.0033099988013	443	297.04	1.6e-15	4.0e-13	12.40	43	0	1329
tatg	tatg	0.0034591118083	458	310.42	2.7e-15	6.9e-13	12.16	44	0	1374
tgtg	tgtg	0.0049498172808	615	444.20	9.2e-15	2.4e-12	11.63	45	62	1845
tgat	tgat	0.0040331968853	511	361.94	8.2e-14	2.1e-11	10.68	46	8	1533
ttac	ttac	0.0032559957122	423	292.19	3.9e-13	1.0e-10	10.00	47	0	1269
atac	atac	0.0029517605592	388	264.89	8.1e-13	2.1e-10	9.68	48	0	1164
aaag	aaag	0.0067581939094	782	606.48	4.2e-12	1.1e-09	8.97	49	0	2346
ctaa	ctaa	0.0036144282618	442	324.36	3.1e-10	8.0e-08	7.09	50	0	1326
; Host name	rsat
; Job started	2026-05-02.224301
; Job done	2026-05-02.224302
; Seconds	0.18
;	user	0.18
;	system	0.03
;	cuser	0.13
;	csystem	0.02
