This is the R Markdown for Extended Dat Fig5, which consists of 5 parts.
library(foreach)
res4=read.table("PromoterCore_summary.xls",sep="\t",header=T,stringsAsFactors=F)
file=dir("hg38_annotation")
file=file[grep("_norep.bed",file)]
file=file[-4]
dt=foreach(i=1:length(file)) %do% read.table(paste("hg38_annotation",file[i],sep="/"),sep="\t",stringsAsFactors=F)
len=c()
for(i in 1:length(dt)){
len[i]=sum(dt[[i]]$V3-dt[[i]]$V2+1)
}
names(len)=sub(".bed","",file)
genom_ref2=len[c(1,2,5,6,8)]/sum(len[c(1,2,5,6,8)])
names(genom_ref2)=c("3UTR","5UTR","IGR","Intron","Promoter")
region=c("Promoter","5UTR","3UTR","Intron","IGR")
ref2=genom_ref2[region]
col_mat=c("red","orange","blue","grey80","grey60")
library(circlize)
#pdf("main_figure1_promoterCore.pdf",height=6,width=6)
par(mar=rep(3,4))
circos.par(cell.padding = c(0, 0, 0, 0),start.degree =0, gap.degree =0,track.margin = c(0, 0))
circos.initialize(1, xlim = cbind(0, 1))
circos.track(ylim=c(0,1),track.height=0.27,bg.border=NA,panel.fun=function(x,y){
m=res4[region,3]
m=m/sum(m)
ml=cumsum(m)*0.94
me=res4[region,4]
me=me/sum(res4[region,3])*0.94
nc = length(m)
for(i in 1:nc) {
circos.rect( c(0,ml[-nc])[i]+0.06/5*i,0.3,ml[i]+0.06/5*i,1,
border =col_mat[i], col = col_mat[i],lwd=0.8)
circos.rect( c(0,ml[-nc])[i]+0.06/5*i,0.3,c(0,ml[-nc])[i]+me[i]+0.06/5*i,1,
border ="purple4", col = "purple4",lwd=0.8)
circos.rect( c(0,ml[-nc])[i]+0.06/5*i,0.3,ml[i]+0.06/5*i,1,
border ="black", col = NA,lwd=0.8)
}
text.loc=c(0,ml[-length(ml)])+ml
#text.loc[5]=text.loc[5]+0.02
circos.text(text.loc/2+(0.06/5*(1:5))-0.01,c(1.6,1.4,rep(1.5,3)),labels = region,niceFacing=T,facing="downward",adj=c(0.5,1),xpd=T,col=col_mat)
})
circos.track(ylim=c(0,1),track.height=0.25,bg.border=NA,panel.fun=function(x,y){
m=res4[region,1]
m=m/sum(m)
ml=cumsum(m)*0.94
me=res4[region,2]
me=me/sum(res4[region,1])*0.94
nc = length(m)
for(i in 1:nc) {
circos.rect( c(0,ml[-nc])[i]+0.06/5*i,0,ml[i]+0.06/5*i,1,
border =col_mat[i], col = col_mat[i],lwd=0.8)
circos.rect( c(0,ml[-nc])[i]+0.06/5*i,0,c(0,ml[-nc])[i]+me[i]+0.06/5*i,1,
border ="purple4", col = "purple4",lwd=0.8)
circos.rect( c(0,ml[-nc])[i]+0.06/5*i,0,ml[i]+0.06/5*i,1,
border ="black", col = NA,lwd=0.8)
}
})
circos.track(ylim=c(0,1),track.height=0.27,bg.border=NA,panel.fun=function(x,y){
m=ref2
ml=cumsum(m)
nc = length(m)
for(i in 1:nc) {
circos.rect( c(0,ml[-nc])[i]+0.06/5,0,ml[i]+0.06/5,0.7,
border ="black", col = col_mat[i],lwd=0.8)
}
})
#dev.off()
new_cluster=read.table("hotspot_promoterCore_cgc_smg_associate_min_max.xls",sep="\t",header=T,stringsAsFactors=F)
color=c(RColorBrewer::brewer.pal(8, name = "Dark2")[1:4],"yellow3","dodgerblue2")
names(color)=c("Enhancer","Promoter","5UTR","3UTR","Intron","IGR")
n_cd=new_cluster
n_cd$col=color[n_cd$Region]
n_cd$Region=factor(n_cd$Region,levels=rev(c("Enhancer","Promoter","5UTR","3UTR","Intron","IGR")))
n_cd=n_cd[order(n_cd$Region),]
#select genes
id=n_cd[n_cd$p.adj<=10^(-50),c(13,4,7,16,17)]
id$s_m=paste(id$p.adj,id$n_mutation_site/id$n_Sample,sep="_")
lg=names(table(id$s_m))[table(id$s_m)!=1]
for(i in 1:length(lg)){
gl=which(id$s_m==lg[i])
id=id[-gl[-length(gl)],]
}
id=id[order(id$p.adj,decreasing = F),1:5]
pos=c(1,3,1,1,1,1,1,1,1,1,
3,1,1,1,4,3,1,3,4,3,
3,3)
loc1=c(rep(0,19),seq(0.03,0.08,len=3))
loc2=c(rep(0,19),rep(-3,3))
#pdf("Funseq2_figure1_promoterCore.pdf",height=12,width=12)
plot(-log10(n_cd$p.adj),n_cd$n_mutation_site/n_cd$n_Sample,col=n_cd$col,pch=16,xlab="-log10(P value)",ylab="number of mutations per sample")
legend("topright",names(color),pch=16,col=color,bty="n")
text(-log10(id$p.adj)+loc2,id$n_mutation_site/id$n_Sample+loc1,id$Gene,cex=0.5,col=color[as.character(id$Region)],pos=pos)
segments(59,1,56,1.05)
#dev.off()
mt=dir("loctest_globtest")[grep("loctest_globtest.0.05.txt",dir("loctest_globtest"))][c(5,6,1,2,7,3)]
path2="figure2_associate_cgc_smg"
file2=dir(path2)[c(5,6,1,2,7,3)]
#mt=dir()[grep("loctest_globtest.0.5$",dir())]
mt_dt=foreach(i=1:length(mt)) %do% read.table(paste("loctest_globtest",mt[i],sep="/"),sep="\t",header=T,stringsAsFactors=F)
mt_id=foreach(i=1:length(file2)) %do% read.table(paste(path2,file2[i],sep="/"),sep="\t",header=T,stringsAsFactors=F)
for(i in c(1:5)){
mt_id[[i]]=mt_id[[i]]
mt_id[[i]]$region=rep(sub("_local_global_associate.xls","",file2)[i],nrow(mt_id[[i]]))
}
mt_id[[6]]=mt_id[[6]][,-c(20)]
mt_id[[6]]$region=rep(sub("_local_global_associate.xls","",file2)[6],nrow(mt_id[[6]]))
mt_id=foreach(i=1:length(mt_dt)) %do% mt_id[[i]][mt_id[[i]]$glob_p_value<0.1|mt_id[[i]]$loc_p_value<0.1,]
id=as.data.frame(do.call(rbind,mt_id)[,c(15,17,19,28,32)])
id$loc_glob=paste(id$loc_p_value,id$glob_p_value,sep="_")
id=id[order(id$glob_p_value,decreasing = F),1:5]
#color=c("blue","red","deeppink","gold","green","orange","purple")
color=c(RColorBrewer::brewer.pal(8, name = "Dark2")[1:4],"yellow3","dodgerblue2")
names(color)=c("enhancer","promoterCore","5utr","3utr","intron","IGR2")
#pdf("figure2_associate_cgc_smg/Funseq2_figure2_promotercore_201909020.pdf",height=9,width=9)
plot(-log10(mt_dt[[1]]$glob_p_value),-log10(mt_dt[[1]]$loc_p_value),xlim=c(0,4),ylim=c(0,4),axes =F,pch=16,col=rev(color)[1],xlab="Global model(P value)",ylab="Regional model(P value)")
for(i in 2:6){
points(-log10(mt_dt[[i]]$glob_p_value),-log10(mt_dt[[i]]$loc_p_value),col=rev(color)[i],pch=16)
}
points(-log10(id$glob_p_value)[1],-log10(id$loc_p_value)[1],col="white",pch=16)
axis(1,at=0:5,labels = 10^(-(0:5)))
axis(2,at=0:4,labels = 10^(-(0:4)))
box()
legend("bottomright",legend=c("Enhancer","PromoterCore","5UTR","3UTR","Intron","IGR"),col=color,pch=16,bty="n")
text(-log10(id$glob_p_value)[-1],-log10(id$loc_p_value)[-1],id$Gene[-1],pos=c(rep(1,8)),cex=0.6,col=color[id$region[-1]],xpd=T)