This is the R Markdown for Figure 4e, which consists of 1 part.
library(foreach)
library(iClusterPlus)
library(GenomicRanges)
library(gplots)
library(lattice)
library(cluster)
library(genefilter)
library(data.table)
library(ggplot2)
library(grid)
#define function
rbind_gtable <- function(...){
gtl = lapply(list(...), ggplotGrob)
bind2 <- function (x, y)
{
stopifnot(ncol(x) == ncol(y))
if (nrow(x) == 0)
return(y)
if (nrow(y) == 0)
return(x)
y$layout$t <- y$layout$t + nrow(x)
y$layout$b <- y$layout$b + nrow(x)
x$layout <- rbind(x$layout, y$layout)
x$heights <- gtable:::insert.unit(x$heights, y$heights)
x$rownames <- c(x$rownames, y$rownames)
x$widths <- grid::unit.pmax(x$widths, y$widths)
x$grobs <- append(x$grobs, y$grobs)
x
}
Reduce(bind2, gtl)
}
theme1 = function(...) theme( panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.margin = unit(0,"null"),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length = unit(0,"null"),
axis.ticks.margin = unit(0,"null"),...)
get.clinical.scale = function(cl_col,cl_lab) {
# Set1 colors
#colors = c("#f0f0f0", "#636363", "#f0f0f0", "#636363", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999")
# use Perou's intrinsic subtype colors instead
colors = cl_col
color.names = cl_lab
names(colors) = color.names
clinical.color.scale = scale_fill_manual(name="mutation", values=colors)
return(clinical.color.scale)
}
#read data
output=alist()
files="cv.fit.k_lambda005_020_100_080_20180819_all_no_mutation.Rdata"
for(i in 1:length(files)){
load(files[i])
output[[i]]=cv.fit
}
nLambda = nrow(output[[1]]$lambda)
nK = length(output)
BIC = getBIC(output)
devR = getDevR(output)
minBICid = apply(BIC,2,which.min)
devRatMinBIC = rep(NA,nK)
for(i in 1:nK){
devRatMinBIC[i] = devR[minBICid[i],i]
}
clusters=getClusters(output)
colnames(clusters)=paste("K=",2:(length(output)+1),sep="")
k=1
best.cluster=clusters[,k]
best.fit=output[[k]]$fit[[which.min(BIC[,k])]]
#cluster result
sp_file=read.table("samples_208.xls",header = T,sep="\t",stringsAsFactors = F)
load(paste("clusterRdata_20180819_CNV_merge_RNA1540_samples","_",".Rdata",sep=""))
cluster=read.table("Icluster4_sp.xls",sep="\t",header=T,stringsAsFactors=F)
rownames(cluster)=cluster$Sample
cluster=cluster[!is.element(cluster$Sample,c("T37","T274")),]
cl_rna=read.table("Rna_cluster.xls",sep="\t",header=F,stringsAsFactors=F,row.names=1)
cl_dmr=read.table("dmr_hypo_hyper_rpmm.xls",sep="\t",header=T,stringsAsFactors=F,row.names=1)
cl_cnv=read.table("CNV_cluster_208.xls",sep="\t",header=T,stringsAsFactors=F,row.names=1)
cl_miRNA=read.table("miRNA_cluster_sp_98.xls",sep="\t",header = T,stringsAsFactors = F)
mt_208=read.table("Mutation_summary_Arv.xls",sep="\t",header=T,stringsAsFactors=F,check.name=F,row.names=1)
cl_dt=mt_208[,c("Pretreatment drug","PSA","GS","Pathology T","Pathology N","Clinical M","Age")]
cl_dt$Age=ifelse(cl_dt$Age<=65,"50~65",ifelse(cl_dt$Age>65&cl_dt$Age<=70,"66~70",ifelse(cl_dt$Age>70&cl_dt$Age<=75,"71~75","76~")))
cl_dt$PSA=ifelse(cl_dt$PSA<4,"<4",ifelse(cl_dt$PSA>=4&cl_dt$PSA<=20,"4~20",">20"))
cl_dt$PSA=paste("PSA",cl_dt$PSA,sep=":")
cl_dt$`Pretreatment drug`[is.na(cl_dt$`Pretreatment drug`)]="NO"
colnames(cl_dt)[c(1,4:6)]=c("Pre-Treat","pT","pN","cM")
cl_dt$GS=paste("GS",cl_dt$GS,sep=":")
cl_dt$GS[cl_dt$GS=="GS:NA"]=NA
cl_dt$Age=paste("Age",cl_dt$Age,sep=":")
cl_dt$iCluster=cluster[rownames(cl_dt),2]
cl_dt$iCluster=paste("Cluster",cl_dt$iCluster,sep="")
cl_dt$`Pre-Treat`=paste("Pre-Treat",cl_dt$`Pre-Treat`,sep=":")
cl_dmr[,1]=as.numeric(as.factor(cl_dmr[,1]))
cl_dt$CNV=cl_cnv[rownames(cl_dt),]
cl_dt$CNV=paste("CNV",cl_dt$CNV,sep=":")
cl_dt$CNV=ifelse(cl_dt$CNV=="CNV:Quiet","Low",ifelse(cl_dt$CNV=="CNV:Some_cnv","Intermediate","High"))
cl_dt$mRNA=NA
cl_dt[rownames(cl_rna),"mRNA"]=paste("RNA",cl_rna[,1],sep="")
cl_dt$DMR=NA
cl_dt[rownames(cl_dmr),"DMR"]=paste("DMR",cl_dmr[,1],sep="")
cl_dt$miRNA=NA
cl_dt[rownames(cl_miRNA),"miRNA"]=paste("miRNA",cl_miRNA[,1],sep="")
sp=cluster[,1]
sorder=order(cluster[,2])
sp=sp[sorder]
tclin=cl_dt[sp,c(8,1:7,9:12)]
#cluster divider
a=cluster$icluster[sorder]
l=length(a)
brkpoints=which(a[2:l]!=a[1:(l-1)])
cluster.start=c(1,brkpoints+1)
#set colors
cl_col =c(c("#E69F00","#0072B2","forestgreen","red"),RColorBrewer::brewer.pal(9, name = "GnBu")[c(6:9)],c("#f1eef6","#d7b5d8","#df65b0","#ce1256","#9B808F50"),c("#bcbddc","#756bb1","#9B808F50"),c("#f1eef6","#d0d1e6","#a6bddb","#74a9cf","#2b8cbe","#045a8d"),c("#fee5d9","#fcae91","#fd6a4a","#de2d26","#a50f15"),c("#edf8b1","#7fcdbb","#2c7fb8"), c("#009E73","#D55E00","#56B4E9"),RColorBrewer::brewer.pal(8, name = "PRGn")[3:1],RColorBrewer::brewer.pal(11, name = "Spectral")[4:9],"#9B808F50",c("#377eb8","#4daf4a","#984ea3","#ff7f00","#ffff33","wheat2","#a65628"),RColorBrewer::brewer.pal(8, name = "Dark2")[5:8])
cl_lab=c(paste("Cluster",1:4,sep=""),paste("Age",c("50~65","66~70","71~75","76~"),sep=":"),"M0","M1a","M1b","M1c","Mx","pN0","pN1","pNx","pT2a","pT2b","pT2c","pT3a","pT3b","pT4","GS:10","GS:6","GS:7","GS:8","GS:9",c("PSA:<4","PSA:4~20","PSA:>20"),c("High","Low","Intermediate"),paste("RNA",1:3,sep=""),paste("DMR",1:6,sep=""),"NA","Pre-Treat:bicalutamide","Pre-Treat:bicalutamide+Finasteride","Pre-Treat:Finasteride","Pre-Treat:Flutamide","Pre-Treat:Leuprorelin+bicalutamide","Pre-Treat:NO","Pre-Treat:Zoladex+bicalutamide",paste("miRNA",1:4,sep=""))
options(warn = -1)
plots=list()
#clinc
image.data=tclin[sp,]
image.data2=as.matrix(rev(as.data.frame(image.data))) #reverse the rows so the image will be top-down ordering
sample_level = row.names(image.data)
gene_level = colnames(image.data2)
image.data_m = melt(image.data2)
colnames(image.data_m) = c("sample","gene","value")
image.data_m$sample = factor(image.data_m$sample, levels = sample_level)
image.data_m$gene = factor(image.data_m$gene, levels = gene_level)
image.data_m$value = as.factor(image.data_m$value)
p = ggplot(image.data_m)
p = p + geom_tile(aes(x=sample, y=gene, fill=value), color=NA)#, linetype="blank")
p = p + get.clinical.scale(cl_col,cl_lab)
p = p + theme_bw(base_size = 10)
p = p + theme1() #+ coord_equal()
p = p + ylab("Clinical") + theme(axis.title.y = element_text(size=10, angle=90))
p=p + theme(axis.text.y = element_text(colour="black", size=10))+guides(fill = guide_legend(keywidth = .7, keyheight = .7, ncol= 4, title = '',byrow = F))
plots[[1]] = p+ geom_vline(xintercept = cluster.start[-1] - 0.5)
#CNV
image.data=tCNV[sp,]
chr=unlist(strsplit(colnames(tCNV),"\\."))
chr=chr[seq(1,length(chr),by=2)]
chr=gsub("chr","",chr)
chr=as.numeric(chr)
len=length(chr)
chrom.ends <- rep(NA,length(table(chr)));
d=1
for(r in unique(chr))
{
chrom.ends[d] <-max(which(chr==r))
d=d+1
}
chrom.starts <-c(1,chrom.ends[-length(table(chr))]+1)
chrom.mids <- (chrom.starts+chrom.ends)/2
image.data2=as.matrix(rev(as.data.frame(image.data))) #reverse the rows so the image will be top-down ordering
sample_level = row.names(image.data)
gene_level = colnames(image.data2)
image.data_m = melt(image.data2)
colnames(image.data_m) = c("sample","gene","value")
image.data_m$sample = factor(image.data_m$sample, levels = sample_level)
image.data_m$gene = factor(image.data_m$gene, levels = gene_level)
max_bound = quantile(image.data_m$value,prob=0.99,na.rm=T)
min_bound = quantile(image.data_m$value,prob=0.01,na.rm=T)
image.data_m$truncated_value = image.data_m$value
image.data_m[image.data_m$truncated_value >= -min_bound,]$truncated_value = -min_bound
image.data_m[image.data_m$truncated_value <= min_bound,]$truncated_value = min_bound
p = ggplot(image.data_m)
p = p + geom_tile(aes(x=sample, y=gene, fill= truncated_value), linetype="blank") + scale_fill_gradientn(name= "Levels", na.value=NA, colours=bluered(256), limit=c(min_bound,-min_bound))
p = p + theme_bw(base_size = 10)
p = p + theme1() #+ coord_equal()
p = p + ylab("CNV") + theme(axis.title.y = element_text(size=10, angle=90))
p = p + geom_vline(xintercept = cluster.start[-1] - 0.5)
plots[[2]] = p + geom_hline(yintercept = len-chrom.starts,color="gray")
#RNA
image.rna=scale(t(RNA.s))
rowsum=apply(abs(best.fit$beta[[2]]),1, sum)
upper=0
image.data=image.rna[sp,which(rowsum> upper)]
diss=1-cor(image.data,use="na.or.complete");
hclust.fit=hclust(as.dist(diss));
#hclust.fit=hclust(dist(t(image.data)))
gorder=hclust.fit$order
image.data=image.data[,gorder]
image.data2=as.matrix(rev(as.data.frame(image.data))) #reverse the rows so the image will be top-down ordering
sample_level = row.names(image.data)
gene_level = colnames(image.data2)
image.data_m = melt(image.data2)
colnames(image.data_m) = c("sample","gene","value")
image.data_m$sample = factor(image.data_m$sample, levels = sample_level)
image.data_m$gene = factor(image.data_m$gene, levels = gene_level)
max_bound = 4
min_bound = -4
image.data_m$truncated_value = image.data_m$value
image.data_m[image.data_m$truncated_value >= max_bound,]$truncated_value = max_bound
image.data_m[image.data_m$truncated_value <= min_bound,]$truncated_value = min_bound
p = ggplot(image.data_m)
p = p + geom_tile(aes(x=sample, y=gene, fill= truncated_value), linetype="blank") + scale_fill_gradientn(name= "Levels", na.value=NA, colours=bluered(256), limit=c(min_bound,max_bound))
p = p + theme_bw(base_size = 10)
p = p + theme1() #+ coord_equal()
p = p + ylab("mRNA") + theme(axis.title.y = element_text(size=10, angle=90))
p = p + theme(axis.text.y = element_blank())
plots[[3]] = p + geom_vline(xintercept = cluster.start[-1] - 0.5)
#DMR
image.dmr=tDMR
image.data=image.dmr[sp,]
diss=1-cor(image.data,use="na.or.complete");
hclust.fit=hclust(as.dist(diss));
#hclust.fit=hclust(dist(t(image.data)))
gorder=hclust.fit$order
image.data=image.data[,gorder]
image.data2=as.matrix(rev(as.data.frame(image.data))) #reverse the rows so the image will be top-down ordering
sample_level = row.names(image.data)
gene_level = colnames(image.data2)
image.data_m = melt(image.data2)
colnames(image.data_m) = c("sample","gene","value")
image.data_m$sample = factor(image.data_m$sample, levels = sample_level)
image.data_m$gene = factor(image.data_m$gene, levels = gene_level)
max_bound = 1
min_bound = 0
image.data_m$truncated_value = image.data_m$value
image.data_m[image.data_m$truncated_value >= max_bound,]$truncated_value = max_bound
image.data_m[image.data_m$truncated_value <= min_bound,]$truncated_value = min_bound
p = ggplot(image.data_m)
p = p + geom_tile(aes(x=sample, y=gene, fill= truncated_value), linetype="blank") + scale_fill_gradientn(name= "Levels", na.value=NA, colours=bluered(256), limit=c(min_bound,max_bound))
p = p + theme_bw(base_size = 10)
p = p + theme1() #+ coord_equal()
p = p + ylab("DMR") + theme(axis.title.y = element_text(size=10, angle=90))
p = p + theme(axis.text.y = element_blank())
plots[[4]]= p + geom_vline(xintercept = cluster.start[-1] - 0.5)
gp = do.call(rbind_gtable, plots)
#fn=paste("iCluster_mRNA_CNV_DMR_208.png",sep="")
#png(fn, height=600, width=960)
grid.draw(gp)
#dev.off()
Figure 4e