Seqanswers Leaderboard Ad

**wilson90** · 06-30-2015, 09:56 PM

peaks <- function(series, span=3, ties.method = "first") {
### This peaks function from Petr Pikal https://stat.ethz.ch/pipermail/r-hel...ry/125241.html
if((span <- as.integer(span)) %% 2 != 1) stop("'span' must be odd")
z <- embed(series, span)
s <- span%/%2
v <- max.col(z, ties.method=ties.method) == 1 + s
pad <- rep(FALSE, s)
result <- c(pad, v, pad)
result
}

args=commandArgs(TRUE)

#df <- read.table("./SRR054616.hg19.50k.k50.nobad.varbin.data.txt", header=T)
#dfs <- read.table("./SRR054616.hg19.50k.k50.nobad.varbin.short.txt", header=T)

df <- read.table("hg19.50k.k50.nobad.varbin.data.txt", header=T)
dfs <- read.table("hg19.50k.k50.nobad.varbin.short.txt", header=T)

starts <- c()
ends <- c()
prevEnd <- 0
len <- nrow(dfs)
for (j in 1:len) {
thisStart = prevEnd + 1
thisEnd = thisStart + dfs$num.mark[j] - 1
starts <- c(starts, thisStart)
ends <- c(ends, thisEnd)
prevEnd = thisEnd
}

amat <- matrix(data=0, nrow=1500000, ncol=1)
counter <- 1
for (j in 1

len-1)) {
for (k in (j+1):len) {
N <- round((starts[j] - ends[j] + 1) * (starts[k] - ends[k] + 1)/1000)
D <- abs(2^dfs$seg.mean[j] - 2^dfs$seg.mean[k])
#cat(N, "\t")
#cat(starts[j],"\t")
#cat(ends[j],"\t")
#cat(starts[k],"\t")
#cat(ends[k],"\t")
#cat(N,"\t")
#cat(counter,"\t")
#cat(D,"\t")
#cat(len,"\n")
if (N > 0) {
amat[(counter

counter+N-1)), 1] <- rep.int(D, N)
counter <- counter+N
}
}
}
a3 <- amat[(1:counter),1]
a3.95 <- sort(a3)[round(.95*counter)]
a3d <- density(a3[which(a3 < a3.95)], n=1000)
a3d$x[which(peaks(as.vector(a3d$y), span=3))]
a3d$y[which(peaks(as.vector(a3d$y), span=3))]
cn0 <- a3d$x[which(peaks(as.vector(a3d$y), span=59))][1]
cn1 <- a3d$x[which(peaks(as.vector(a3d$y), span=59))][2]
quantile(a3d$x[which(peaks(as.vector(a3d$y), span=3))],c(0.2,0.4,0.6,0.8,1))
cn0
cn1
df$cn.ratio <- df$lowratio / cn1
df$cn.seg <- df$seg.mean.LOWESS / cn1
df$copy.number <- round(df$cn.seg)

write.table(df, sep="\t", file=paste("sc.varbin.data.copynumber.txt", sep=""), quote=F, row.names=F)

postscript("sc.wg.cn.density.ps", height=400, width=600)
par(mar=c(5.1,4.1,4.1,4.1))
plot(a3d, main="seg.mean difference density")
dev.off()

for (a in 1:24) {
postscript(paste("sc.wg.cn.chr", a, ".ps", sep=""), height=400, width=600)
par(mar=c(5.1,4.1,4.1,4.1))
plot(df$cn.ratio[df$chrom==a], main=paste("SRR054616 chr", a, sep=""), xlab="Bin", ylab="Ratio", col="#CCCCCC")
lines(df$cn.ratio[df$chrom==a], col="#CCCCCC")
points(df$cn.seg[df$chrom==a], col="#0000DD")
lines(df$cn.seg[df$chrom==a], col="#0000DD")
points(df$copy.number[df$chrom==a], col="#DD0000")
lines(df$copy.number[df$chrom==a], col="#DD0000")
dev.off()
}

This is the part where the author tries to estimate copy number. It is kind of him to share his code, but it comes without any comment or explanation.

Can anyone familiar with CNV field kind enough to explain what this R code is doing?

Thank you.

Wilson

Topics	Statistics	Last Post
Evaluating Genome Sequencing for ECMO Patients in the NICU by seqadmin Started by seqadmin, 12-17-2024, 10:28 AM	0 responses 22 views 0 likes	Last Post by seqadmin 12-17-2024, 10:28 AM
New Genetic Toolkit Refines Studies on Gene Function and Disease by seqadmin Started by seqadmin, 12-13-2024, 08:24 AM	0 responses 42 views 0 likes	Last Post by seqadmin 12-13-2024, 08:24 AM
Study Links Brain Mechanism to Emotional Responses in Animals and Humans by seqadmin Started by seqadmin, 12-12-2024, 07:41 AM	0 responses 28 views 0 likes	Last Post by seqadmin 12-12-2024, 07:41 AM
Study Identifies Ribosomal RNA Fingerprints as Early Cancer Biomarkers by seqadmin Started by seqadmin, 12-11-2024, 07:45 AM	0 responses 42 views 0 likes	Last Post by seqadmin 12-11-2024, 07:45 AM

Seqanswers Leaderboard Ad

Announcement

Have anyone tried analysis in Baslan 2012 "Genome-wide CNV analysis of Single-cell"?

Comment

Latest Articles

ad_right_rmr

News