getwd()
Sweave("tut11.Rnw")
library(illuminaHumanv1.db)
help(illuminaHumanv1SYMBOL)
package?illuminaHumanv1.db
help("illuminaHumanv1.db-package")
illuminaHumanv1()
?illuminaHumanv1ARRAYADDRESS
help(package=illuminaHumanv1.db)
sessionInfo()
help(package=org.Hs.eg.db)
library(org.Hs.eg.db)
?org.Hs.eg
?org.Hs.egSYMBOL
?org.Hs.egCHRLOC
loadFeatures(system.file("sqlite/hg18.txdb.sqlite", package="ggtut")) -> hg18.txdb
library(GenomicFeatures)
loadFeatures(system.file("sqlite/hg18.txdb.sqlite", package="ggtut")) -> hg18.txdb
hg18.txdb
genes
?transcripts
?id2name
?transcripts
?transcriptsBy
hg18.txdb
tx = transcriptsBy(hg18.txdb, "gene")
tx
?transcriptsBy
class(hg18.txdb)
class?TranscriptDb
table(seqnames(hg18.txdb))
class?TranscriptDb
tx = transcripts(hg18.txdb)
tx
tx[seqnames(tx) == "chr17"]
t17 = tx[seqnames(tx) == "chr17"]
class(t17)
class?TranscriptDb
txg = transcriptsBy(hg18.txdb, "gene")
txg
seqnames(txg[[1]])
is17 = function(x) values(seqnames(x))[1] == "chr17"
kp  = sapply(txg, is17)
kp  = sapply(txg[1:4], is17)
kp
is17 = function(x) values(seqnames(x))[1] 
is17(txg[[1]])
seqnames(txg[[1]])
values(seqnames(txg[[1]]))
value(seqnames(txg[[1]]))
as(txg[[1]], "character")
txg[[1]]
as(seqnames(txg[[1]]), "character")
is17 = function(x) as(seqnames(x),"character")[1] == "chr17"
kp  = sapply(txg[1:6], is17)
kp
kp  = sapply(txg, is17)
kp  = sapply(txg[1:100], is17)
table(kp)
kp  = sapply(txg[1:1000], is17)
table(kp)
txg[[1]]
txg[[1]]@seqinfo
txg[[1]]@seqnames
txg[[1]]@seqnames@values
txg[[1]]@seqnames@values == "chr19"
is_17 = function(x) x@seqnames@values[1]=="chr17"
kp  = sapply(txg[1:1000], is_17)
table(kp)
unix.time(kp  <- sapply(txg, is_17))
?rdapply
 countrows <- function(rd) nrow(rd)
       params <- RDApplyParams(rd, countrows)
hg18.txdb
is(hg18.txdb,"RangedData")
class(tx)
is(tx, "RangedData")
txbg = transcriptsBy(hg18.txdb, "gene")
library(org.Hs.eg.db)
g17 = get("17", revmap(org.Hs.egCHR))
length(g17)
txbg17 = txbg[g17]
txbg17 = txbg[intersect(names(txbg),g17)]
txbg17
table(sapply(txbg18,length))
table(sapply(txbg17,length))
lows = sapply(txbg17, function(x)min(start(x)))
his = sapply(txbg17, function(x)max(end(x)))
all(lows<his)
lows[1:10]
gint17 = GRanges(seqnames="chr17", IRanges(lows,his))
names(gint17) = names(lows)
gint17
savehistory(file="geteglims.hist.txt")
pn = probesManaged
pn = probesManaged(f1)
pn.eg = mget(pn, illuminaHumanv1ENTREZID)
table(sapply(pn.eg,length))
f1
pn.eg = unlist(pn.eg)
pn.eg[1:5]
pn.ranges = gint17[pn.eg]
sum(is.na(pn.eg))
pn.ranges = gint17[na.omit(pn.eg)]
pn.ranges = gint17[intersect(names(gint17),na.omit(pn.eg))]
pn.ranges
pn = probesManaged(f1)
library(illuminaHumanv1.db)
pn.eg = unlist(mget(pn, illuminaHumanv1ENTREZID))
pn.eg = na.omit(pn.eg)
txdb = loadFeatures(system.file("sqlite/hg18.txdb.sqlite"), package="ggtut")
txdb = loadFeatures(system.file("sqlite/hg18.txdb.sqlite", package="ggtut"))
txg = transcriptsBy(txdb, "gene")
txg17 = txg[ intersect(names(txg), pn.eg) ]
c(txg17)
unlist(txg17)
txg17
unlist(txg17,USE.NAMES=FALSE)
unlist(txg17)
is(txg17[[1]], "RangedData")
extents = function(x) c(min(start(x)),max(end(x)))
ss = t(sapply(txg17,extents))
ss[1:10,]
pn.eg[1:10]
eg.pn = names(pn.eg)
names(eg.pn) = pn.eg
eg.pn[1:10]
ss = t(sapply(txg17,extents))
g17rngs = GRanges(seqnames="chr17", IRanges(ss[,1], ss[,2]), probeid=eg.pn[rownames(ss)])
names(g17rngs) = rownames(ss)
g17rngs
get("GI_4885638-S", illuminaHumanv1CHRLOC)
get("10040", org.Hs.egCHRLOC)
objects()
txbg17[["10040"]]
get("uc010dbz.1", revmap(org.Hs.egUCSCKG))
get("uc010dca.1", revmap(org.Hs.egUCSCKG))
g17rngs
objects()
snpgr17[ match(snpgr17, g17rngs+50000) ]
mm = match(snpgr17, g17rngs+50000)
summary(mm)
mm = match(snpgr17, g17rngs+50000, nomatch=0)
snpgr17[mm]
snpgr17[mm[mm>0]]
snpgr17[ which(snpgr17 %in%  g17rngs+50000) ]
snpgr17 %in%  g17rngs
snpgr17[which(snpgr17 %in%  (g17rngs+50000))]
snpgr17
snpgr17[which(snpgr17 %in%  (g17rngs+5000))]
g17rngs
lapply(g17rngs[1:5], function(x) names(snpgr17)[snpgr17 %in% (x+50000)])
lapply(1:5, function(x) names(snpgr17)[snpgr17 %in% (g17rngs[x]+50000)])
lapply(1:5, function(x) names(snpgr17)[snpgr17 %in% (g17rngs[x]+5000)])
savehistory(file="grablocal.hist.txt")
catchSNP5k = lapply(1:length(g17rngs), function(x) names(snpgr17)[snpgr17 %in% (g17rngs[x]+5000)])
summary(sapply(catchSNP5k, length))
which.max(sapply(catchSNP5k,length))
g17rngs[301]
sort(sapply(catchSNP5k,length),decreasing=TRUE)[1:10]
get("5831", org.Hs.egSYMBOL)
get("5831", org.Hs.egCHRLOC)
txg["5831"]
extents = function(x) {y = x[seqnames(x)=="chr17"]; c(min(start(y)),max(end(y)))}  # watch for random
ssnr = t(sapply(txg17,extents))
g17rngsnr = GRanges(seqnames="chr17", IRanges(ssnr[,1], ssnr[,2]), probeid=eg.pn[rownames(ssnr)])
extents = function(x) {y = x[seqnames(x)=="chr17"]; c(min(start(y)),max(end(y)))}  # watch for random
ssnr = lapply( txg17, function(z) try(extents(z)) )
ssnr[1:10]
firsts = sapply(ssnr, function(x) {if(is.numeric(x[1])) return(x[1]); NA})
which(is.na(firsts))
summary(firsts)
firsts = sapply(ssnr, function(x) {if(is.finite(x[1])) return(x[1]); NA})
summary(firsts)
which(is.na(firsts))
txg[["348262"]]
if (any(is.na(firsts))) ssnr = ssnr[-which(is.na(firsts))]
firsts = sapply(ssnr, function(x) {if(is.numeric(x[1])) return(x[1]); NA})
lasts = sapply(ssnr, function(x) {if(is.numeric(x[2])) return(x[2]); NA})
g17rngsnr = GRanges(seqnames="chr17", IRanges(firsts,lasts), probeid=eg.pn[rownames(ssnr)])
extents = function(x) {y = x[seqnames(x)=="chr17"]; c(min(start(y)),max(end(y)))}  # watch for random
ssnr = lapply( txg17, function(z) try(extents(z)) )
firsts = sapply(ssnr, function(x) {if(is.numeric(x[1])) return(x[1]); NA})
if (any(is.na(firsts))) ssnr = ssnr[-which(is.na(firsts))]
firsts = sapply(ssnr, function(x) {if(is.numeric(x[1])) return(x[1]); NA})
lasts = sapply(ssnr, function(x) {if(is.numeric(x[2])) return(x[2]); NA})
g17rngsnr = GRanges(seqnames="chr17", IRanges(firsts,lasts), probeid=eg.pn[rownames(ssnr)])
summary(firsts)
firsts = sapply(ssnr, function(x) {if(is.finite(x[1])) return(x[1]); NA})
if (any(is.na(firsts))) ssnr = ssnr[-which(is.na(firsts))]
firsts = sapply(ssnr, function(x) {if(is.numeric(x[1])) return(x[1]); NA})
lasts = sapply(ssnr, function(x) {if(is.numeric(x[2])) return(x[2]); NA})
g17rngsnr = GRanges(seqnames="chr17", IRanges(firsts,lasts), probeid=eg.pn[rownames(ssnr)])
ssnr = lapply( txg17, function(z) try(extents(z)) )
firsts = sapply(ssnr, function(x) {if(is.finite(x[1])) return(x[1]); NA})
if (any(is.na(firsts))) ssnr = ssnr[-which(is.na(firsts))]
firsts = sapply(ssnr, function(x) {if(is.numeric(x[1])) return(x[1]); NA})
lasts = sapply(ssnr, function(x) {if(is.numeric(x[2])) return(x[2]); NA})
g17rngsnr = GRanges(seqnames="chr17", IRanges(firsts,lasts), probeid=eg.pn[rownames(ssnr)])
summary(firsts)
summary(lasts)
length(rownames(ssnr))
g17rngsnr = GRanges(seqnames="chr17", IRanges(firsts,lasts), probeid=eg.pn[names(ssnr)])
names(g17rngsnr) = names(ssnr)
g17rngsnr
summary(width(g17rngsnr))
lapply(g17rngsnr[1:5], function(x) names(snpgr17)[snpgr17 %in% (x+50000)])
lapply(1:5, function(x) names(snpgr17)[snpgr17 %in% (g17rngsnr[x]+50000)])
lapply(1:length(g17rngsnr), function(x) names(snpgr17)[snpgr17 %in% (g17rngsnr[x]+50000)]) -> OO
summary(sapply(OO,length))
OO[1:4]
length(OO)
g17rngsnr
to50k = lapply(1:length(g17rngsnr), function(x) names(snpgr17)[snpgr17 %in% (g17rngsnr[x]+50000)]) 
names(to50k) = elementMetadata(g17rngsnr)$probeid
to50k[1]
f1[rsid(to50k[1]), probeId(names(to50k)[1])]
f1[rsid(to50k[[1]]), probeId(names(to50k)[1])]
f1[rsid(intersect(snpsManaged(f1),to50k[[1]])), probeId(names(to50k)[1])]
savehistory(file="getscoresALL.hist.txt")
allsco50k = lapply(1:length(to50k), function(x) f1[rsid(intersect(snpsManaged(f1),to50k[[x]])), probeId(names(to50k)[x])]
)
length(to50k)
allsco50k = lapply(1:10, function(x) f1[rsid(intersect(snpsManaged(f1),to50k[[x]])), probeId(names(to50k)[x])])
length(names(to50k))
allsco50k = lapply(1:1, function(x) f1[rsid(intersect(snpsManaged(f1),to50k[[x]])), probeId(names(to50k)[x])])
allsco50k = lapply(1:2, function(x) f1[rsid(intersect(snpsManaged(f1),to50k[[x]])), probeId(names(to50k)[x])])
allsco50k = lapply(1:5, function(x) f1[rsid(intersect(snpsManaged(f1),to50k[[x]])), probeId(names(to50k)[x])])
allsco50k = lapply(1:length(to50k), function(x) try(f1[rsid(intersect(snpsManaged(f1),to50k[[x]])), probeId(names(to50k)[x])])
)
length(as.numeric(unlist(allsco50k)))
sum(sapply(OO,length))
sum(sapply(to50k,length))
sum(sapply(to50k,function(x)length(intersect(x, snpsManaged(f1))))
)
objects()
permf1
allsco50k = lapply(1:length(to50k), function(x) try(permf1[rsid(intersect(snpsManaged(permf1),to50k[[x]])), probeId(names(to50k)[x])])
)
quantile(as.numeric(unlist(allsco50k)), .99, na.rm=TRUE)
1-pchisq(6.464,1)
quantile(as.numeric(unlist(allsco50k)), .995, na.rm=TRUE)
1-pchisq(7.5,1)
quantile(as.numeric(unlist(allsco50k)), .999, na.rm=TRUE)
allsco50k = lapply(1:length(to50k), function(x) try(f1[rsid(intersect(snpsManaged(f1),to50k[[x]])), probeId(names(to50k)[x])])
)
fallsco = lapply(fallsco, function(x) try(x[x>6.464]))
fallsco = lapply(allsco50k, function(x) try(x[x>6.464]))
fallsco = lapply(allsco50k, function(x) try(x[[1]][x[[1]]>6.464]))
fallsco[1:2]
allsco50k[1:2]
fallsco[1:20]
fallsco = lapply(allsco50k, function(x) try(x[[1]][x[[1]]>6.464,]))
fallsco
fallsco = lapply(allsco50k, function(x) try(x[[1]][x[[1]]>6.464,,drop=FALSE]))
fallsco
allsc = unlist(fallsco)
allsc[1:10]
class(fallsco[[468]])
dim(fallsco[[468]])
allsc = lapply(fallsco, function(x) if (nrow(x)>0) x else NULL)
fallsco[[4]]
allsc = lapply(fallsco, function(x) if (!inherits(x, "try-error") && nrow(x)>0) x else NULL)
allsc[1:5]
allsc[1:15]
allsc = lapply(fallsco, function(x) if (!inherits(x, "try-error") && nrow(x)>0) as.numeric(x) else NULL)
allsc[1:15]
alln = lapply(fallsco, function(x) if (!inherits(x, "try-error") && nrow(x)>0) rownames(x) else NULL)
ALLSC = unlist(allsc)
names(ALLSC) = unlist(alln)
ALLSC[1:10]
snpgr17
snpgr17[ names(ALLSC) ]
in50k = snpgr17[ names(ALLSC) ]
elementMetadata(in50k)$score = as.numeric(ALLSC)
in50k
export(in50k, "~/in50k.wig")
table(width(in50k))
sum(duplicated(start(in50k)))
sum(duplicated(names(ALLSC)))
ALLSC[duplicated(names(ALLSC))][1:10]
savehistory(file="getTo50kDUPLICATEDS.hist.txt")
