This is the R Markdown for Extended Dat Fig6, which consists of 6 parts.
library(Biostrings)
library(foreach)
load("proteinseq.RData")
load("exon_anno.RData")
load("procodingseq.RData")
snv=read.table("SNV_mtab.xls",header = T,stringsAsFactors = F)
indel=read.table("Indel_tab.xls",header = T,stringsAsFactors = F)
indel_aa=readLines("test_indel.fasta")
ins=indel[nchar(indel$refbase)==1,]
del=indel[nchar(indel$refbase)!=1,]
del2=del[which((nchar(del$refbase)-1)%%3!=0),]
del3=del[which((nchar(del$refbase)-1)%%3==0),]
b=strsplit(procodingseq[1,1],split="")
a=strsplit(proteinseq[1,1],split="")
a[[1]]=a[[1]][-length(a[[1]])]
m_s=(del3$pincoding-1)%%3
m_sj=m_s
m_sj[which(m_sj==0)]=3
del_p=foreach(j=1:nrow(del3)) %do% b[[1]][(del3$pincoding[j]-33-m_s[j]):(del3$pincoding[j]+nchar(del3$refbase[j])+4+6-m_sj[j])]
del_m=foreach(j=1:nrow(del3)) %do% ifelse(del_p[[j]]=="C","G",ifelse(del_p[[j]]=="G","C",ifelse(del_p[[j]]=="T","A","T")))
j=28
aa=strsplit(as.character(translate(DNAString(paste0(del_p[[j]],collapse = "")))),split="")[[1]]
id=paste(paste(del3$proname[j],del3$pincoding[j],sep="_"),paste(del3$refbase[j],del3$varbase[j],sep = ">"),sep=":")
gl=grep(id,indel_aa)
var_aa=strsplit(indel_aa[gl+1],split="")[[1]]
aa_s=trunc((del3$pincoding[j]-1)/3)
aa_p=a[[1]][(aa_s-10):length(a[[1]])]
var_aa_p=var_aa[(aa_s-10):length(var_aa)]
vs=0.3
vl=ceiling(length(aa_p)/55)
vp=rep(".",length((aa_s-10):(length(a[[1]]))))
names(vp)=(aa_s-10):(length(a[[1]]))
vip=which(as.numeric(names(vp))%%10==0)
for(i in vip){
vp[(i-1):(i+1)]=strsplit(names(vp)[i],split="")[[1]]
}
col=c(rep("black",33+m_s[j]),rep("grey",nchar(del3$refbase)[[j]]-1),rep("black",8-m_sj[j]))
par(mar=c(1,3,3,4))
plot(0,0,xlim=c(-9,63),ylim=c(-2,2),type="n",axes=F,xlab="",ylab="",main=paste("Position",del3$pos[j],sep=":"))
lines(c(0,(length(del_p[[j]])+1)),c(2,2))
lines(c(0,(length(del_p[[j]])+1)),c(1.6,1.6))
lines(c(0,(length(del_p[[j]])+1)),c(1.4,1.4))
text(c(-1,-1),c(1.6,2),c("-","+"))
text(c(-2,-2),c(1.4,1.8),c("Amino Acid","DNA"),pos=2,xpd=T)
for(i in 1:length(del_p[[j]]))
{
lines(c(i,i),c(2,1.9))
lines(c(i,i),c(1.7,1.6))
#lines(c(i,i),c(0,0.2))
}
text(1:length(del_p[[j]]),rep(1.85,length(del_p[[j]])),del_p[[j]],col=col,cex=0.7)
text(1:length(del_m[[j]]),rep(1.75,length(del_m[[j]])),del_m[[j]],col=col,cex=0.7)
#text(1:length(del_mr[[j]]),rep(0.3,length(del_mr[[j]])),del_mr[[j]],col=col)
points(seq(2,length(del_p[[j]]),by=3),rep(1.4,length(aa)),pch=16,cex=3,col=c(rep("orange",11),rep("grey",length(aa)-14),rep("orange",3)))
text(seq(2,length(del_p[[j]]),by=3),rep(1.4,length(aa)),aa)
rect(1,0.8,(aa_s+1)/9,1,col="orange",border = NA)
rect((aa_s+1)/9,0.8,(aa_s+length(aa)-4)/9,1,col="gray",border = NA)
rect((aa_s+length(aa)-4)/9,0.8,length(a[[1]])/9,1,col="orangered",border = NA)
rect(1,0.8,length(a[[1]])/9,1)
rect(1,0.4,aa_s/9,0.6,col="orange",border = NA)
rect(aa_s/9,0.4,nchar(indel_aa[gl+1])/9,0.6,col="orangered",border = NA)
rect(1,0.4,nchar(indel_aa[gl+1])/9,0.6)
lines(c(1,1),c(1,1.05))
lines(c(length(a[[1]])/9,length(a[[1]])/9),c(1,1.05))
lines(c((aa_s+1)/9,aa_s/9-0.5),c(1,1.05))
lines(c((aa_s+length(aa)-4)/9,(aa_s+length(aa)-4)/9+0.5),c(1,1.05))
lines(c(1,1),c(0.6,0.65))
lines(rep(nchar(indel_aa[gl+1])/9,2),c(0.6,0.65))
lines(rep((aa_s+1)/9,2),c(0.6,0.65))
text(c(1,aa_s/9-1,(aa_s+length(aa)-4)/9+1,length(a[[1]])/9),rep(1.1,4),c(1,aa_s+1,aa_s+length(aa)-4,length(a[[1]])),cex=0.7)
text(c(1,aa_s/9,nchar(indel_aa[gl+1])/9),rep(0.7,3),c(1,aa_s+1,nchar(indel_aa[gl+1])),cex=0.7)
text(1:11,rep(vs,11),rep("*",11),cex=0.8)
for(k in 1:vl){
text(1:55,rep(vs-0.1-(k-1)*0.4,55),aa_p[((k-1)*55+1):(55*k)],cex=0.7)
text(1:55,rep(vs-0.2-(k-1)*0.4,55),var_aa_p[((k-1)*55+1):(55*k)],cex=0.7)
text(1:55,rep(vs-0.3-(k-1)*0.4,55),vp[((k-1)*55+1):(55*k)],cex=0.7)
text(c(-2,-2),c(vs-0.1-(k-1)*0.4,vs-0.2-(k-1)*0.4),c("Ref","Var"),cex=0.7)
}
text(c(-2,-2),c(0.9,0.5),c("Ref","Var"))
if(max(length(aa_p),length((var_aa_p)))%%55!=0){
text(rep(max(length(aa_p),length((var_aa_p)))%%55+4,2),c(vs-0.1-(max(vl)-1)*0.4,vs-0.2-(max(vl)-1)*0.4),c(length(a[[1]]),length(var_aa)),cex=0.7)
}else{
text(rep(57.5,2),c(vs-0.1-(max(vl)-1)*0.4,vs-0.2-(max(vl)-1)*0.4),c(length(a[[1]]),length(var_aa)),cex=0.7)
}
library(foreach)
library(readr)
library(gplots)
source("my.heatmap.2.R")
file=dir()[grep("KEGG.xls$",dir())]
dtr=foreach(i=1:length(file)) %do% as.data.frame(read.delim(file[i],"\t",header=T,skip=4,stringsAsFactors=F))[,-12]
dtr=foreach(i=1:length(file)) %do% dtr[[i]][-((nrow(dtr[[i]])-9):nrow(dtr[[i]])),]
dt=foreach(i=1:length(file)) %do% dtr[[i]][dtr[[i]]$Corrected.P.Value<0.1,]
vn=venn(list(FS_vs_Miss=dt[[1]]$X.Term,IF_vs_FS=dt[[2]]$X.Term,IF_vs_Miss=dt[[3]]$X.Term),show.plot = F)
path_list=attr(vn, "intersections")
path_n=foreach(i=1:length(path_list),.combine=c) %do% length(path_list[[i]])
pathway_ovlp=data.frame(group=rep(names(path_list),path_n),pathway=unlist(path_list))
path_list=unique(foreach(i=1:length(dt),.combine=c) %do% dt[[i]]$X.Term)
mt=matrix(0.11,nrow=length(dt),ncol=length(path_list))
rownames(mt)=c("FS_vs_Miss","IF_vs_FS","IF_vs_Miss")
colnames(mt)=path_list
for(i in 1:length(dt)){
rownames(dt[[i]])=dt[[i]]$X.Term
mt[i,rownames(dt[[i]])]=dt[[i]]$Corrected.P.Value
}
mt=apply(mt,2,as.numeric)
rownames(mt)=c("FS_vs_Miss","IF_vs_FS","IF_vs_Miss")
mt_p=mt
mt=matrix(0,nrow=length(dt),ncol=length(path_list))
rownames(mt)=c("FS_vs_Miss","IF_vs_FS","IF_vs_Miss")
colnames(mt)=path_list
for(i in 1:length(dt)){
rownames(dt[[i]])=dt[[i]]$X.Term
mt[i,rownames(dt[[i]])]=1
}
mt=apply(mt,2,as.numeric)
rownames(mt)=c("FS_vs_Miss","IF_vs_FS","IF_vs_Miss")
my.heatmap.2(mt_p,mt, key.xtickfun=function() {
breaks <- parent.frame()$breaks
return(list(
at=parent.frame()$scale01(c(breaks)),
labels=c(as.character(breaks))
))
},col=c(RColorBrewer::brewer.pal(9, name = "RdYlGn")[c(8,7,6,5)],"white"),breaks=seq(0,0.125,len=6),key.xlab = "Corrected P-value",Colv = T,Rowv=F,dendrogram = "column",density.info="none", margins=c(12,9),trace="none",cexRow = 1,cexCol = 1,srtCol=90,colsep = seq(0,14,by=1),rowsep = seq(0,3,by=1),sepcolor = "grey90",sepwidth = c(0.025,0.0125))
library(foreach)
library(scales)
library(ComplexHeatmap)
alpi=read.table("SELECT_result_mutual_Exclusive.xls",sep="\t",header = T,stringsAsFactors = T)
sub_alpi=alpi[alpi$wMI_p.value<0.05,]
Gene=union(sub_alpi$SFE_1,sub_alpi$SFE_2)
smg=read.table("Our_sig_SMG_206.xls",sep="\t")
our_smg=rownames(smg)
#pathway gene
pth_dt=read.table("Pathway_gene_tab.xls",header=T,sep="\t",stringsAsFactors = F)
pth_smg=pth_dt$Gene
gene_anot=foreach(i=1:length(Gene),.combine=rbind) %do% unlist(strsplit(Gene[i],split="_"))
colnames(gene_anot)=c("Gene","alter")
gene_anot=as.data.frame(gene_anot)
gene_anot=merge(gene_anot,pth_dt,by="Gene",all.x = T)
sub_anot=gene_anot
sub_anot=sub_anot[order(sub_anot$pathwaylist),]
sub_anot$Gene=as.character(sub_anot$Gene)
sub_anot$pathwaylist[is.na(sub_anot$pathwaylist)]="SMG"
sub_anot$pathwaylist[sub_anot$alter=="Fusion"]="Fusion"
pth_col=hue_pal(l=40)(13)
names(pth_col)=unique(sub_anot$pathwaylist)
sub_anot$col=pth_col[sub_anot$pathwaylist]
sub_anot$col[is.na(sub_anot$col)]="gray"
Gene_f=paste(sub_anot$Gene,as.character(sub_anot$alter),sep="_")
sub_alpi=alpi[is.element(alpi$SFE_1,Gene_f)&is.element(alpi$SFE_2,Gene_f)&alpi$wMI_p.value<0.05,]
pt=matrix(0,nrow=length(Gene_f),ncol=length(Gene_f))
rownames(pt)=Gene_f
colnames(pt)=Gene_f
#color
for(i in 1:nrow(sub_alpi)){
if(sub_alpi$direction[i]=="CO"){
rl=which(rownames(pt)==sub_alpi$SFE_1[i])
cl=which(rownames(pt)==sub_alpi$SFE_2[i])
if(rl>cl){
pt[rl,cl]=-log10(sub_alpi$wMI_p.value[i]+0.0001)
}else{
pt[cl,rl]=-log10(sub_alpi$wMI_p.value[i]+0.0001)
}
}else{
rl=which(rownames(pt)==sub_alpi$SFE_1[i])
cl=which(rownames(pt)==sub_alpi$SFE_2[i])
if(rl>cl){
pt[cl,rl]=log10(sub_alpi$wMI_p.value[i]+0.0001)
}else{
pt[rl,cl]=log10(sub_alpi$wMI_p.value[i]+0.0001)
}
}
}
rownames(pt)=sub("_Amp","",sub("_Del","",sub("_Fusion","",sub("_Mutation","",rownames(pt)))))
colnames(pt)=sub("_Amp","",sub("_Del","",sub("_Fusion","",sub("_Mutation","",colnames(pt)))))
ppt=matrix("",nrow=length(Gene_f),ncol=length(Gene_f))
rownames(ppt)=Gene_f
colnames(ppt)=Gene_f
#label
for(i in 1:nrow(sub_alpi)){
if(sub_alpi$direction[i]=="CO"){
rl=which(rownames(ppt)==sub_alpi$SFE_1[i])
cl=which(rownames(ppt)==sub_alpi$SFE_2[i])
if(rl>cl){
ppt[rl,cl]=ifelse(sub_alpi$FDR[i]==T,"*","")
}else{
ppt[cl,rl]=ifelse(sub_alpi$FDR[i]==T,"*","")
}
}else{
rl=which(rownames(ppt)==sub_alpi$SFE_1[i])
cl=which(rownames(ppt)==sub_alpi$SFE_2[i])
if(rl>cl){
ppt[cl,rl]=ifelse(sub_alpi$FDR[i]==T,"*","")
}else{
ppt[rl,cl]=ifelse(sub_alpi$FDR[i]==T,"*","")
}
}
}
rownames(ppt)=sub("_Amp","",sub("_Del","",sub("_Fusion","",sub("_Mutation","",rownames(ppt)))))
colnames(ppt)=sub("_Amp","",sub("_Del","",sub("_Fusion","",sub("_Mutation","",colnames(ppt)))))
ppt=ppt[sub_anot$Gene,sub_anot$Gene]
pch_alt=c(16,15,17,18)
names(pch_alt)=c("Mutation","Amp","Del","Fusion")
#library(ComplexHeatmap)
col_anot=sub_anot$pathwaylist
names(col_anot)=sub_anot$Gene
col_anot[is.na(col_anot)]="Fusion"
ha=rowAnnotation(foo=anno_simple(sub_anot$pathwaylist,col=pth_col,pch=pch_alt[as.character(sub_anot$alter)],gp=gpar(col="white"),pt_gp = gpar(col="white")))
ht=Heatmap(pt[sub_anot$Gene,sub_anot$Gene],cluster_columns = F,cluster_rows = F,rect_gp = gpar(col = "white", lwd = 1),col=c(hue_pal(l=40)(9)[2],"white",hue_pal(l=50)(9)[4]),
border = "black",row_names_side = "left",left_annotation = ha,cell_fun = function(j, i, x, y, w, h, col) { # add text to each grid
grid.text(ppt[i, j], x, y,vjust=unit(0.8,"npc"),gp=gpar(fontsize=20,col="white"))
},heatmap_legend_param = list(
title = "", at = c(-4, 4),
labels = c("Mutual-Exclusive","Co-occurence")
))
lgd_list=list(
lgd1=Legend(title = "Pathway", legend_gp = gpar(fill=pth_col),
labels = names(pth_col)),
lgd2=Legend(title = "Alteration", legend_gp = gpar(col="white"),type="points",pch=pch_alt,
labels = names(pch_alt),background = "black")
)
#pdf("CO_ME_threshold_select.pdf",width=13,height =9)
draw(ht, annotation_legend_list = lgd_list)