
## ----echo=FALSE----------------------------------------------------------
suppressPackageStartupMessages(library(hgu95av2.db))


## ------------------------------------------------------------------------
## Affymetrix U133 2.0 array IDs of interest; these might be
## obtained from
##
##   tbl <- topTable(efit, coef=2)
##   ids <- tbl[["ID"]]
##
## as part of a more extensive workflow.
ids <- c("39730_at", "1635_at", "1674_at", "40504_at", "40202_at")


## ------------------------------------------------------------------------
library("hgu95av2.db")


## ------------------------------------------------------------------------
columns(hgu95av2.db)


## ------------------------------------------------------------------------
keytypes(hgu95av2.db)


## ------------------------------------------------------------------------
head(keys(hgu95av2.db, keytype="ENTREZID"))


## ------------------------------------------------------------------------
select(hgu95av2.db, keys=ids, columns="ENTREZID", keytype="PROBEID")


## ------------------------------------------------------------------------
select(hgu95av2.db, keys=ids, columns=c("ENTREZID","GENENAME", "SYMBOL"), keytype="PROBEID")


## ------------------------------------------------------------------------
res <- select(hgu95av2.db, keys=ids[1], columns="GO", keytype="PROBEID")
head(res)


## ------------------------------------------------------------------------
library("GO.db")
head(res$GO)  ## shows what we are using as keys
head(select(GO.db, keys=res$GO, columns="TERM", keytype="GOID"))


## ------------------------------------------------------------------------
library(org.Hs.eg.db)
keys <- head(keys(org.Hs.eg.db, keytype="ENTREZID"), n=2)
columns <- c("PFAM","GO", "SYMBOL")
select(org.Hs.eg.db, keys, columns, keytype="ENTREZID")


## ----echo=FALSE----------------------------------------------------------
suppressPackageStartupMessages(library(TxDb.Hsapiens.UCSC.hg19.knownGene))


## ------------------------------------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene ## done for convenience
keys <- head(keys(txdb, keytype="GENEID"), n=2)
columns <- c("TXNAME", "TXSTART","TXSTRAND")
select(txdb, keys, columns, keytype="GENEID")


## ------------------------------------------------------------------------
transcripts(txdb)


## ------------------------------------------------------------------------
exons(txdb)


## ------------------------------------------------------------------------
transcripts(txdb, columns = c("tx_id","tx_name","gene_id"))


## ------------------------------------------------------------------------
transcriptsBy(txdb, by="gene")


## ----echo=FALSE----------------------------------------------------------
suppressPackageStartupMessages(library(Homo.sapiens))


## ------------------------------------------------------------------------
library(Homo.sapiens)
keys <- head(keys(Homo.sapiens, keytype="ENTREZID"), n=2)
columns <- c("SYMBOL","TXNAME")
select(Homo.sapiens, keys, columns, keytype="ENTREZID")


## ------------------------------------------------------------------------
transcripts(Homo.sapiens, columns=c("TXNAME","SYMBOL"))


## ----eval=FALSE----------------------------------------------------------
## gd <- list(join1 = c(GO.db="GOID", org.Hs.eg.db="GO"),
##       	   join2 = c(org.Hs.eg.db="ENTREZID",
##            TxDb.Hsapiens.UCSC.hg19.knownGene="GENEID"))
## 
## makeOrganismPackage(pkgname = "Homo.sapiens",
## 	            graphData = gd,
## 		    organism = "Homo sapiens",
## 		    version = "1.0.0",
## 		    maintainer = "Package Maintainer<maintainer@somewhere.org>",
## 		    author = "Some Body",
## 		    destDir = ".",
## 		    license = "Artistic-2.0")


## ------------------------------------------------------------------------
library(AnnotationHub)

ah = AnnotationHub()


## ------------------------------------------------------------------------
res <- ah$goldenpath.hg19.encodeDCC.wgEncodeUwTfbs.wgEncodeUwTfbsMcf7CtcfStdPkRep1.narrowPeak_0.0.1.RData

res


## ------------------------------------------------------------------------
length(ah)


## ------------------------------------------------------------------------
filters(ah)


## ------------------------------------------------------------------------
columns(ah)


## ------------------------------------------------------------------------
head(keys(ah, keytype="Species"))


## ------------------------------------------------------------------------
filters(ah) <- list(Species="Bos taurus")

length(ah)


## ------------------------------------------------------------------------
library("biomaRt")
head(listMarts())
ensembl <- useMart("ensembl")
ensembl


## ------------------------------------------------------------------------
head(listDatasets(ensembl))
ensembl <- useMart("ensembl",dataset="hsapiens_gene_ensembl")
ensembl


## ------------------------------------------------------------------------
head(listFilters(ensembl))


## ------------------------------------------------------------------------
head(listAttributes(ensembl))


## ------------------------------------------------------------------------
affyids=c("202763_at","209310_s_at","207500_at")
getBM(attributes=c('affy_hg_u133_plus_2', 'entrezgene'), 
                    filters = 'affy_hg_u133_plus_2', 
                    values = affyids, mart = ensembl)


## ------------------------------------------------------------------------
head(getBM(attributes='affy_hg_u133_plus_2', mart = ensembl))


## ----eval=FALSE----------------------------------------------------------
## source("http://bioconductor.org/biocLite.R")
## biocLite(c("hgu95av2.db", "GO.db"))


## ----eval=FALSE----------------------------------------------------------
## library(AnnotationDbi)
## library(GO.db)


## ----eval=FALSE----------------------------------------------------------
## browseVignettes(package="AnnotationDbi")


## ----eval=FALSE----------------------------------------------------------
## help.start()


## ------------------------------------------------------------------------
keys <- "MSX2"
columns <- c("ENTREZID","PROBEID", "CHR")
select(hgu95av2.db, keys, columns, keytype="SYMBOL")


## ------------------------------------------------------------------------
chipSymbols <- keys(hgu95av2.db, keytype="SYMBOL")
orgSymbols <- keys(org.Hs.eg.db, keytype="SYMBOL")
length(orgSymbols)
length(chipSymbols)


## ------------------------------------------------------------------------
dim(select(org.Hs.eg.db,orgSymbols, "ENTREZID", "SYMBOL"))
dim(select(hgu95av2.db,chipSymbols, "ENTREZID", "SYMBOL")) 


## ------------------------------------------------------------------------
length(columns(org.Hs.eg.db)) < length(columns(hgu95av2.db))


## ------------------------------------------------------------------------
head(select(hgu95av2.db,chipSymbols, "PROBEID", "SYMBOL"))


## ------------------------------------------------------------------------
egr <- select(org.Hs.eg.db, orgSymbols, "ENTREZID", "SYMBOL")
length(egr$ENTREZID)
length(unique(egr$ENTREZID))
## VS:
length(egr$SYMBOL)
length(unique(egr$SYMBOL))
## So lets trap these symbols that are redundant and look more closely...
redund <- egr$SYMBOL
badSymbols <- redund[duplicated(redund)]
select(org.Hs.eg.db, badSymbols, "ENTREZID", "SYMBOL")


## ------------------------------------------------------------------------
res1 <- select(TxDb.Hsapiens.UCSC.hg19.knownGene, 
               keys(TxDb.Hsapiens.UCSC.hg19.knownGene, keytype="TXID"),
       	       columns=c("GENEID","TXNAME","TXCHROM"), keytype="TXID")

head(res1)


## ------------------------------------------------------------------------
res2 <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene, 
                    columns = c("gene_id","tx_name")) 
head(res2)


## ------------------------------------------------------------------------
library(TxDb.Athaliana.BioMart.plantsmart16)
res <- transcripts(TxDb.Athaliana.BioMart.plantsmart16, columns = c("gene_id")) 


## ----eval=FALSE----------------------------------------------------------
## library(Homo.sapiens)
## keys <- keys(Homo.sapiens, keytype="TXID")
## res1 <- select(Homo.sapiens,
##                keys= keys,
##        	       columns=c("SYMBOL","TXSTART","TXCHROM"), keytype="TXID")
## 
## head(res1)


## ------------------------------------------------------------------------
library(Homo.sapiens)
res2 <- transcripts(Homo.sapiens, columns="SYMBOL") 
head(res2)


## ------------------------------------------------------------------------
columns(Homo.sapiens)
columns(org.Hs.eg.db)
columns(TxDb.Hsapiens.UCSC.hg19.knownGene)
## You might also want to look at this:
transcripts(Homo.sapiens, columns=c("SYMBOL","CHRLOC"))


## ------------------------------------------------------------------------
keytypes(ah)


## ------------------------------------------------------------------------
keys(ah, keytype="DataProvider")
head(keys(ah, keytype="Genome"))


## ------------------------------------------------------------------------
filters(ah) <- NULL
filters(ah) <- list(Species="Homo sapiens", 
                    DataProvider="hgdownload.cse.ucsc.edu",
		    Genome="hg19")
length(ah)


## ------------------------------------------------------------------------
res <- ah$goldenpath.hg19.database.oreganno_0.0.1.RData


## ------------------------------------------------------------------------
library("biomaRt")
ensembl <- useMart("ensembl",dataset="hsapiens_gene_ensembl")
ids=c("1")
getBM(attributes=c('go_id', 'entrezgene'), 
		    filters = 'entrezgene',
                    values = ids, mart = ensembl)



## ------------------------------------------------------------------------
library(org.Hs.eg.db)
ids=c("1")
select(org.Hs.eg.db, keys=ids, columns="GO", keytype="ENTREZID")


## ------------------------------------------------------------------------
sessionInfo()


