View Single Post
Old 04-08-2014, 06:28 AM   #79
nbahlis
Member
 
Location: Canada

Join Date: May 2013
Posts: 25
Default

Quote:
Originally Posted by dpryan View Post
Then presumably that should work fine.
here is the script I've written:
> sampleFiles <- list.files(path="~/Desktop/HTseq_counts/", pattern="*.counts")
> Table3 <- data.frame(
+ sampleName = sampleFiles, fileName = sampleFiles,
+ condition = c( "pre", "pre", "pre", "pre", "pre", "pre", "pre", "pre", "pre", "pre", "pre", "pre", "post", "post", "post", "post", "post", "post", "post", "post", "post", "post", "post", "post"),
+ libType = c("pair8", "pair10", "pair9", "pair1", "pair7", "pair11", "pair2", "pair3", "pair4", "pair12", "pair13", "pair6", "pair7", "pair1", "pair3", "pair4", "pair11", "pair2", "pair6", "pair10", "pair9", "pair8", "pair12", "pair13"))
> directory <- c("~/Desktop/HTseq_counts/")
> design <- formula(~ libType + condition)
> ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable= Table3, directory= directory, design= design)
Usage note: the following factors have 3 or more levels:

libType

For DESeq2 versions < 1.3, if you plan on extracting results for
these factors, we recommend using betaPrior=FALSE as an argument
when calling DESeq().
As currently implemented in version 1.2, the log2 fold changes can
vary if the base level is changed, when extracting results for a
factor with 3 or more levels. A solution will be implemented in
version 1.3 which allows for the use of a beta prior and symmetric
log2 fold change estimates regardless of the selection of base level.
> Table3
sampleName fileName condition libType
1 P110.counts P110.counts pre pair8
2 P124.counts P124.counts pre pair10
3 P149.counts P149.counts pre pair9
4 P185.counts P185.counts pre pair1
5 P189.counts P189.counts pre pair7
6 P192.counts P192.counts pre pair11
7 P218.counts P218.counts pre pair2
8 P227.counts P227.counts pre pair3
9 P235.counts P235.counts pre pair4
10 P274.counts P274.counts pre pair12
11 P321.counts P321.counts pre pair13
12 P351.counts P351.counts pre pair6
13 P357.counts P357.counts post pair7
14 P367.counts P367.counts post pair1
15 P377.counts P377.counts post pair3
16 P384.counts P384.counts post pair4
17 P426.counts P426.counts post pair11
18 P543.counts P543.counts post pair2
19 P584.counts P584.counts post pair6
20 P590.counts P590.counts post pair10
21 P594.counts P594.counts post pair9
22 P610.counts P610.counts post pair8
23 P653.counts P653.counts post pair12
24 P654.counts P654.counts post pair13
> ddsHTSeq
class: DESeqDataSet
dim: 51911 24
exptData(0):
assays(1): counts
rownames(51911): ENSG00000000003 ENSG00000000005 ... ENSG00000259170
ENSG00000259171
rowData metadata column names(0):
colnames(24): P110.counts P124.counts ... P653.counts P654.counts
colData names(2): condition libType
> dds <- DESeq(ddsHTSeq)
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
1515 rows did not converge in beta, labelled in mcols(object)$betaConv. Use larger maxit argument with nbinomWaldTest
> res <- results(dds)
> res <- res[order(res$padj),]
> plotMA(dds, main="DESeq2", ylim=c(-2,2))
> plotDispEsts(dds)
> hist( res$pvalue, breaks=100)
> filterThreshold <- 5
> keep <- rowMeans( counts( dds, normalized=TRUE)) > filterThreshold
> table(p.adjust(res$pvalue, method="BH") < 0.1)

FALSE TRUE
44016 1036
> table(p.adjust(res$pvalue[keep], method="BH") < 0.1)

FALSE TRUE
17543 1684
> hist( res$pvalue[keep], breaks=100)
> reskeep <- results(dds[keep])
> reskeep <- reskeep[order(reskeep$padj),]
> library( "org.Hs.eg.db" )
Loading required package: AnnotationDbi
Loading required package: Biobase
Welcome to Bioconductor

Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: DBI

> cols(org.Hs.eg.db)
[1] "ENTREZID" "PFAM" "IPI" "PROSITE" "ACCNUM"
[6] "ALIAS" "CHR" "CHRLOC" "CHRLOCEND" "ENZYME"
[11] "MAP" "PATH" "PMID" "REFSEQ" "SYMBOL"
[16] "UNIGENE" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS" "GENENAME"
[21] "UNIPROT" "GO" "EVIDENCE" "ONTOLOGY" "GOALL"
[26] "EVIDENCEALL" "ONTOLOGYALL" "OMIM" "UCSCKG"
Warning message:
In cols(org.Hs.eg.db) :
'cols' has been deprecated and replaced by 'columns' for versions of
Bioc that are higher than 2.13. Please use 'columns' anywhere that
you previously used 'cols'
> convertIDs <- function( ids, fromKey, toKey, db, ifMultiple=c( "putNA", "useFirst" ) ) {
+ stopifnot( inherits( db, "AnnotationDb" ) )
+ ifMultiple <- match.arg( ifMultiple )
+ suppressWarnings( selRes <- AnnotationDbi::select(
+ db, keys=ids, keytype=fromKey, cols=c(fromKey,toKey) ) )
+ if( ifMultiple == "putNA" ) {
+ duplicatedIds <- selRes[ duplicated( selRes[,1] ), 1 ]
+ selRes <- selRes[ ! selRes[,1] %in% duplicatedIds, ] }
+ return( selRes[ match( ids, selRes[,1] ), 2 ] ) }
> reskeep$symbol <- convertIDs( row.names(reskeep), "ENSEMBL", "SYMBOL", org.Hs.eg.db )
> head(reskeep)
DataFrame with 6 rows and 7 columns
baseMean log2FoldChange lfcSE stat pvalue
<numeric> <numeric> <numeric> <numeric> <numeric>
ENSG00000118193 352.79648 -1.3133381 0.1805654 -7.273477 3.503503e-13
ENSG00000090889 486.81411 -1.5000754 0.2177141 -6.890116 5.574692e-12
ENSG00000166851 591.71901 -1.4069394 0.2097238 -6.708535 1.965874e-11
ENSG00000213401 45.86075 -3.6684941 0.5520076 -6.645731 3.017157e-11
ENSG00000144554 1137.21388 -0.8662289 0.1334898 -6.489104 8.634842e-11
ENSG00000204592 27901.79642 0.6212955 0.0959501 6.475194 9.469050e-11
padj symbol
<numeric> <character>
ENSG00000118193 7.130679e-09 KIF14
ENSG00000090889 5.673085e-08 KIF4A
ENSG00000166851 1.333714e-07 PLK1
ENSG00000213401 1.535205e-07 NA
ENSG00000144554 3.212059e-07 FANCD2
ENSG00000204592 3.212059e-07 HLA-E

> select <- order(p.adjust( reskeep$pvalue, method="BH" ) < .0005)
> heatmap.2( assay(rld) [ select, ], scale="row", cexRow=0.5, cexCol=0.75,
+ trace="none", dendrogram="column",
+ col = redgreen(75))
> select <- order(p.adjust( reskeep$pvalue[keep], method="BH" ) < .00005)
> heatmap.2(assay(rld)[select,],col = redgreen(75),trace="none", cexRow=0.75, cexCol=0.75)
> select <- order(p.adjust( reskeep$pvalue[keep], method="BH" ) < .00000005)
> heatmap.2( assay(rld) [ select, ], scale="row", cexRow=0.5, cexCol=0.75,
+ trace="none", dendrogram="column",
+ col = redgreen(75))
> select <- order(p.adjust( reskeep$padj[keep], method="BH" ) < .00000005)
> heatmap.2( assay(rld) [ select, ], scale="row", cexRow=0.5, cexCol=0.75,
+ trace="none", dendrogram="column",
+ col = redgreen(75))
nbahlis is offline   Reply With Quote