set.seed(13342143)
sessionInfo()
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.5 (Yosemite)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] magrittr_1.5 formatR_1.2 tools_3.2.2 htmltools_0.2.6
## [5] yaml_2.1.13 stringi_0.5-5 rmarkdown_0.7 knitr_1.11
## [9] stringr_1.0.0 digest_0.6.8 evaluate_0.7.2
setwd("~/Desktop/Projects/NGS-course/SLDC")
# simulate read counts under a log normal distribution
go.1 <- rlnorm(n = 1e4, meanlog = 3, sdlog = 1)
head(go.1)
## [1] 104.032753 21.115618 13.296879 7.716048 28.856369 33.456710
hist(go.1, col = "grey", main = "", las = 1, breaks = 50)
#Male data
M.exp <- go.1 + rnorm(1e4, mean = 0, sd = 10)
M.exp[M.exp < 0.1] <- 0.1 # we will be using log2, and don't want the world to implode.
#Female data
F.exp <- go.1 + rnorm(1e4, mean = 0, sd = 10)
F.exp[F.exp < 0.1] <- 0.1
# Combine into data.frame
df.2 <- data.frame(exp = go.1, male = M.exp, female = F.exp, chr = as.factor(rep(1:10)))
head(df.2)
## exp male female chr
## 1 104.032753 108.777455 104.35075 1
## 2 21.115618 26.920815 35.79136 2
## 3 13.296879 8.237346 11.17147 3
## 4 7.716048 0.100000 25.14156 4
## 5 28.856369 38.871033 26.13981 5
## 6 33.456710 39.967444 27.40130 6
tail(df.2)
## exp male female chr
## 9995 22.001704 25.884432 35.098484 5
## 9996 12.251902 21.353553 7.301167 6
## 9997 3.623728 0.100000 0.100000 7
## 9998 1.507560 1.495755 23.806271 8
## 9999 14.583080 15.142078 10.117831 9
## 10000 51.379336 46.607079 45.080487 10
str(df.2)
## 'data.frame': 10000 obs. of 4 variables:
## $ exp : num 104.03 21.12 13.3 7.72 28.86 ...
## $ male : num 108.78 26.92 8.24 0.1 38.87 ...
## $ female: num 104.4 35.8 11.2 25.1 26.1 ...
## $ chr : Factor w/ 10 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
plot(M.exp, F.exp, pch = 19, las = 1, col = rgb(0, 0, 0, 0.2))
#### Make your own M:A plot
plot(log2(go.1), log2(M.exp) - log2(F.exp), pch = 19, col = rgb(0, 0, 0, 0.2), ylim = c(-10, 10), xlim = c(0, 10), las = 1, xlab = expression(paste("log Expression")), ylab = expression(paste("log"[2], " Male - log"[2], " Female")))
# plot males count density
plot(density(log2(M.exp)), lwd = 3, main = "")
# overlay female count density
lines(density(log2(F.exp)), lwd = 3, main = "", col = "red", lty = 2)
# overlay the base expression
lines(density(log2(df.2$exp)), lwd = 3, col = "dodgerblue", lty = 4)
par(mfrow = c(1, 2))
# Chromosome 1 is the sex chromosome
boxplot(log2(df.2$male) ~ df.2$chr, las =1, pch = 19, col = c("red", rep("dodgerblue", 9)), ylim = c(-1, 9), main = "male", outline = FALSE)
boxplot(log2(df.2$female) ~ df.2$chr, las =1, pch = 19, col = c("red", rep("dodgerblue", 9)), ylim = c(-1, 9), main = "female", outline = FALSE)
par(mfrow = c(1, 1))
# This is chromosome by chromosome
boxplot((log2(df.2$male) - log2(df.2$female)) ~ df.2$chr, pch = 19, col = c("red", rep("dodgerblue", 9)), las = 1, ylim = c(-3, 3), main = expression(paste("log"[2], "Male - log"[2], "Female")), outline = FALSE)
# Autosomes vs. sex-chromosome
plot(density(log2(df.2$male[df.2$chr != 1]) - log2(df.2$female[df.2$chr != 1])), lwd = 3, las = 1, main = "", xlab = "")
lines(density(log2(df.2$male[df.2$chr == 1]) - log2(df.2$female[df.2$chr == 1])), lwd = 3, col = "red")
df.2$M.z <- df.2$male
df.2$M.z[df.2$chr == 1] <- df.2$male[df.2$chr == 1] * 0.7
df.2$F.z <- df.2$female
df.2$F.z[df.2$chr == 1] <- df.2$female[df.2$chr == 1] * 0.7
head(df.2)
## exp male female chr M.z F.z
## 1 104.032753 108.777455 104.35075 1 76.144219 73.04553
## 2 21.115618 26.920815 35.79136 2 26.920815 35.79136
## 3 13.296879 8.237346 11.17147 3 8.237346 11.17147
## 4 7.716048 0.100000 25.14156 4 0.100000 25.14156
## 5 28.856369 38.871033 26.13981 5 38.871033 26.13981
## 6 33.456710 39.967444 27.40130 6 39.967444 27.40130
par(mfrow = c(1, 2))
boxplot(log2(df.2$M.z) ~ df.2$chr, las =1, pch = 19, col = c("red", rep("dodgerblue", 9)), ylim = c(-1, 9), main = "male", outline = FALSE)
boxplot(log2(df.2$F.z) ~ df.2$chr, las =1, pch = 19, col = c("red", rep("dodgerblue", 9)), ylim = c(-1, 9), main = "female", outline = FALSE)
par(mfrow = c(1, 1))
df.2$trt <- df.2$male
df.2$trt[df.2$chr == 1] <- df.2$male[df.2$chr == 1] * 2
head(df.2)
## exp male female chr M.z F.z trt
## 1 104.032753 108.777455 104.35075 1 76.144219 73.04553 217.554910
## 2 21.115618 26.920815 35.79136 2 26.920815 35.79136 26.920815
## 3 13.296879 8.237346 11.17147 3 8.237346 11.17147 8.237346
## 4 7.716048 0.100000 25.14156 4 0.100000 25.14156 0.100000
## 5 28.856369 38.871033 26.13981 5 38.871033 26.13981 38.871033
## 6 33.456710 39.967444 27.40130 6 39.967444 27.40130 39.967444
par(mfrow = c(1, 2))
boxplot(log2(df.2$trt) ~ df.2$chr, las =1, pch = 19, col = c("red", rep("dodgerblue", 9)), ylim = c(-1, 10), main = "male", outline = FALSE)
boxplot(log2(df.2$F.z) ~ df.2$chr, las =1, pch = 19, col = c("red", rep("dodgerblue", 9)), ylim = c(-1, 10), main = "female", outline = FALSE)
par(mfrow = c(1, 1))
# with density plots for male vs. female
plot(density(log2(df.2$trt[df.2$chr != 1]) - log2(df.2$F.z[df.2$chr != 1])), lwd = 3, las = 1, main = "", xlab = "", ylim = c(0, 0.65))
lines(density(log2(df.2$trt[df.2$chr == 1]) - log2(df.2$F.z[df.2$chr == 1])), lwd = 3, col = "red")
# and with M:F boxplots
boxplot((log2(df.2$trt) - log2(df.2$female)) ~ df.2$chr, pch = 19, col = c("red", rep("dodgerblue", 9)), las = 1, ylim = c(-3, 4), main = expression(paste("log"[2], "Male - log"[2], "Female")), outline = FALSE)
# T <- FALSE
# T
# Bmori.dat <- read.csv("Data/Bmori-data.csv", header = T)
# head(Bmori.dat)
Bmori.dat <- read.csv("Data/Bmori-data.csv", header = TRUE)
head(Bmori.dat)
## X DNA rpkm.f67 rpkm.m68 rpkm.f69 rpkm.m70 Assembly
## 1 1 gene7136 416.36491 358.92354 413.79858 382.33994 ASM15162v1
## 2 2 gene7137 16777.64269 17743.90956 15144.15295 16193.52100 ASM15162v1
## 3 3 gene6997 120.59783 124.01753 123.03134 123.73535 ASM15162v1
## 4 4 gene7279 56.31174 76.32931 85.97140 59.51128 ASM15162v1
## 5 5 gene7168 99.88249 106.29217 98.38808 136.62353 ASM15162v1
## 6 6 gene7023 0.00000 0.00000 0.00000 0.00000 ASM15162v1
## GenBank.Accession.version NCBI.name chr
## 1 DF090316.1 GPS_001612900.1 13
## 2 DF090316.1 GPS_001612900.1 13
## 3 DF090316.1 GPS_001612900.1 13
## 4 DF090316.1 GPS_001612900.1 13
## 5 DF090316.1 GPS_001612900.1 13
## 6 DF090316.1 GPS_001612900.1 13
# TRUE <- FALSE
# TRUE <- F
# TRUE <- "anything"
str(Bmori.dat)
## 'data.frame': 12690 obs. of 10 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ DNA : Factor w/ 12690 levels "gene100","gene1000",..: 9584 9585 9435 9742 9619 9465 9462 9383 9503 9508 ...
## $ rpkm.f67 : num 416.4 16777.6 120.6 56.3 99.9 ...
## $ rpkm.m68 : num 358.9 17743.9 124 76.3 106.3 ...
## $ rpkm.f69 : num 413.8 15144.2 123 86 98.4 ...
## $ rpkm.m70 : num 382.3 16193.5 123.7 59.5 136.6 ...
## $ Assembly : Factor w/ 1 level "ASM15162v1": 1 1 1 1 1 1 1 1 1 1 ...
## $ GenBank.Accession.version: Factor w/ 176 levels "DF090316.1","DF090317.1",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ NCBI.name : Factor w/ 176 levels "GPS_001612571.1",..: 141 141 141 141 141 141 141 141 141 141 ...
## $ chr : int 13 13 13 13 13 13 13 13 13 13 ...
# Make your own M:A plots
M.Mcont_Fcont<- log2(Bmori.dat$rpkm.m68) - log2(Bmori.dat$rpkm.f67)
A.Mcont_Fcont <- log2((Bmori.dat$rpkm.m68 + Bmori.dat$rpkm.f67) / 2)
M.Mcont_Fexp <- log2(Bmori.dat$rpkm.m68) - log2(Bmori.dat$rpkm.f69)
A.Mcont_Fexp <- log2((Bmori.dat$rpkm.m68 + Bmori.dat$rpkm.f69) / 2)
M.Mexp_Fcont <- log2(Bmori.dat$rpkm.m70) - log2(Bmori.dat$rpkm.f67)
A.Mexp_Fcont <- log2((Bmori.dat$rpkm.m70 + Bmori.dat$rpkm.f67) / 2)
M.Mexp_Fexp <- log2(Bmori.dat$rpkm.m70) - log2(Bmori.dat$rpkm.f69)
A.Mexp_Fexp <- log2((Bmori.dat$rpkm.m70 + Bmori.dat$rpkm.f69) / 2)
# make the four pairwise plots
par(mfrow = c(2, 2))
plot(A.Mcont_Fcont, M.Mcont_Fcont, main = "Mcont - Fcont", pch = 19, col = rgb(0, 0, 0, 0.1), cex = 0.8, ylim = c(-4, 6), xlim = c(-5, 17), ylab = expression(paste("log"[2], " expression")), xlab = "")
abline(h = 0, lwd = 3, lty = 1, col = "red")
plot(A.Mcont_Fcont, M.Mcont_Fexp, main = "Mcont - Fexp", pch = 19, col = rgb(0, 0, 0, 0.1), cex = 0.8, ylim = c(-8, 8), xlim = c(-5, 17), xlab = "")
abline(h = 0, lwd = 3, lty = 1, col = "red")
plot(A.Mexp_Fcont, M.Mexp_Fcont, main = "Mexp - Fcont", pch = 19, col = rgb(0, 0, 0, 0.1), cex = 0.8, ylim = c(-6, 8), xlim = c(-5, 17), ylab = expression(paste("log"[2], " expression")), xlab = "Expected count")
abline(h = 0, lwd = 3, lty = 1, col = "red")
plot(A.Mexp_Fexp, M.Mexp_Fexp, main = "Mexp - Fexp", pch = 19, col = rgb(0, 0, 0, 0.1), cex = 0.8, ylim = c(-5, 7), xlim = c(-5, 17), xlab = "Expected count")
abline(h = 0, lwd = 3, lty = 1, col = "red")
par(mfrow = c(1, 1))
par(mfrow = c(1, 2))
boxplot(log2(Bmori.dat$rpkm.m68[Bmori.dat$rpkm.m68 >= 1]) ~ Bmori.dat$chr[Bmori.dat$rpkm.m68 >= 1], pch = 19, notch = TRUE, col = c("red", rep("grey", 27)), na.rm = TRUE, las = 1, ylim = c(-1, 15), main = "Control (male)", ylab = expression(paste("log"[2]," expression (RPKM)")), outline = FALSE)
boxplot(log2(Bmori.dat$rpkm.m70[Bmori.dat$rpkm.m70 >= 1]) ~ Bmori.dat$chr[Bmori.dat$rpkm.m70 >= 1], pch = 19, notch = TRUE, col = c("red", rep("grey", 27)), na.rm = TRUE, las = 1, ylim = c(-1, 15), main = expression(paste(italic("Expr"), " knockdown (male)")), ylab = expression(paste("log"[2]," expression (RPKM)")), outline = FALSE)
par(mfrow = c(1, 1))
plot.ratios <- function(chr = Bmori.dat$chr, minRPKM = 1, sampA, sampB, ...) {
rpkm <- data.frame(chr, sampA, sampB)
filter <- (sampA >= minRPKM & sampB >= minRPKM)
rpkm <- rpkm[filter,]
rpkm$lratio <- log2(rpkm$sampA) - log2(rpkm$sampB)
boxplot(rpkm$lratio ~ rpkm$chr, ...)
}
plot.ratios(sampA = Bmori.dat$rpkm.m70, sampB=Bmori.dat$rpkm.f69, minRPKM = 0.1, main = expression(paste(italic(Expr), " Male vs. Female")), outline = FALSE, col = c("red", rep("grey", 27)), notch = TRUE, ylab = expression(paste("log" [2], "(Male) - log" [2], "(Female)")), las = 1)
#### Clearly the experimental treatment has resulted in the upregulation of the sex chromosome
plot.density.za <- function(chr = Bmori.dat$chr, minRPKM = 1, sampA, sampB, ...) {
rpkm <- data.frame(chr, sampA, sampB)
filter <- (sampA > minRPKM & sampB > minRPKM)
rpkm <- rpkm[filter,]
rpkm$lratio <- log2(rpkm$sampA) - log2(rpkm$sampB)
rpkm$za <- ifelse(rpkm$chr == 1, "Z", "A")
Zvals <- rpkm$lratio[rpkm$za == "Z"]
Avals <- rpkm$lratio[rpkm$za == "A"]
plot(density(Avals), las = 1, ...)
abline(v = median(Avals), lty = 2, lwd = 3)
lines(density(Zvals), col = "red", lwd = 3)
abline(v = median(Zvals), lty = 2, col = "red", lwd = 3)
}
plot.density.za(sampA = Bmori.dat$rpkm.m70, sampB = Bmori.dat$rpkm.f69, minRPKM = 1, main = expression(paste(italic("Masc"), ": Male vs. Female ")), xlim = c(-2,2), lwd = 3, xlab = expression(paste("Log"[2], " (Male:Female)")), ylab = "")
legend("topright", legend = c("Z Chromosome", "Autosomes"), lty = 1, col = c("red", "black"), bty = "n", lwd = 3)
get.za.stats <- function(chr = Bmori.dat$chr, minRPKM = 1, samp, plot.it = FALSE, ...) {
rpkm <- data.frame(chr, samp)
rpkm <- rpkm[samp > minRPKM,]
rpkm$za <- ifelse(rpkm$chr == 1, "Z", "A")
if (plot.it == TRUE) {
boxplot(log2(rpkm$samp) ~ rpkm$za, ...)
}
Zvals <- rpkm$samp[rpkm$za == "Z"]
Avals <- rpkm$samp[rpkm$za == "A"]
Zmean <- round(mean(Zvals), digits = 3)
Zmedian <- round(median(Zvals), digits = 3)
Amean <- round(mean(Avals), digits = 3)
Amedian <- round(median(Avals), digits = 3)
za.mean <- round(Zmean/Amean, digits = 4)
za.median <- round(Zmedian/Amedian, digits = 4)
wilcox <- wilcox.test(Zvals, Avals)
out.stats <- list( "Zmean" = Zmean, "Amean" = Amean, "Z:A_mean"= za.mean, "Zmedian" = Zmedian, "Amedian" = Amedian, "Z:A_median" = za.median, "ZvA_MWU-p" = signif(wilcox$p.value, digits = 4), "n_Zgenes" = length(Zvals), "n_Agenes" = length(Avals)
)
return(out.stats)
}
sapply(Bmori.dat[3:6], FUN = function(x) get.za.stats(samp = x))
## rpkm.f67 rpkm.m68 rpkm.f69 rpkm.m70
## Zmean 206.727 218.62 228.218 316.713
## Amean 418.812 425.255 402.138 416.213
## Z:A_mean 0.4936 0.5141 0.5675 0.7609
## Zmedian 54.129 55.004 61.61 85.748
## Amedian 104.553 101.347 102.622 99.977
## Z:A_median 0.5177 0.5427 0.6004 0.8577
## ZvA_MWU-p 2.189e-10 1.794e-08 1.123e-06 0.04087
## n_Zgenes 408 415 435 443
## n_Agenes 9710 9782 9962 9833