This is the R Markdown for Figure 1, which consists of 2 parts.
sp_file=read.table("samples_208.xls",header = T,sep="\t",stringsAsFactors = F)$Samples_208
library(maftools)
#cnv
path="somatic_cnv"
library(foreach)
file=dir(path)[grep("WGS.final.mpileup.gz_CNVs.filter.p.value.txt",dir(path))]
size_cnv=c()
for(i in 1:length(file)){
dt=read.table(paste(path,file[i],sep="/"),sep="\t",stringsAsFactors = F,header = T)
dt=dt[dt$status=="gain"|dt$status=="loss",1:5]
dt$Sample=rep(sub("_WGS.final.mpileup.gz_CNVs.filter.p.value.txt","",file[i]),nrow(dt))
size_cnv[i]=sum(as.numeric(dt$end)-as.numeric(dt$start)+1)
if(i==1){
dt_all=dt
}else{
dt_all=rbind(dt_all,dt)
}
}
cnv=t(table(dt_all[,5:6]))
sub_cnv=cnv[is.element(rownames(cnv),sp_file),]
sub_cnv=as.data.frame(cbind(sub_cnv,Sample=rownames(sub_cnv)))
#fs
soap=read.table("SOAPfuse.filter.summary_208.xls",sep="\t",stringsAsFactors = F,header = T)
sp=gsub("_WTS","",unlist(strsplit(soap$sample,split=",")))
fs_tab=table(sp)
sub_fs=fs_tab[sp_file]
names(sub_fs)=sp_file
sub_fs[is.na(sub_fs)]=0
#somatic mutation
nonsys=c("Missense_Mutation","Nonsense_Mutation","Nonstop_Mutation","Splice_Site","Frame_Shift_Del","Frame_Shift_Ins","In_Frame_Ins","In_Frame_Del")
coding=c("Frame_Shift_Del","Frame_Shift_Ins","In_Frame_Del","In_Frame_Ins","Missense_Mutation","Nonsense_Mutation","Nonstop_Mutation","Silent","Splice_Site")
noncoding=c("Intron","3'UTR","5'UTR","RNA","IGR","5'Flank","3'Flank")
mt=read.maf("Somatic_mutation.filter.208.maf",removeDuplicatedVariants = F)
sub_mt=subsetMaf(mt,includeSyn = T,tsb = NULL, genes = NULL,fields = NULL, query = NULL, mafObj = FALSE, isTCGA = FALSE)
cd_mt=sub_mt[is.element(sub_mt$Variant_Classification,coding),]
ncd_mt=sub_mt[is.element(sub_mt$Variant_Classification,noncoding),]
cd_tab=table(cd_mt[,c("Tumor_Sample_Barcode","Variant_Type")])
ncd_tab=table(ncd_mt[,c("Tumor_Sample_Barcode","Variant_Type")])
sub_cd=data.frame(Sample=sub("_WGS","",rownames(cd_tab)),cd_SNP=cd_tab[,3],cd_Indel=rowSums(cd_tab[,1:2]))
sub_ncd=data.frame(Sample=sub("_WGS","",rownames(ncd_tab)),ncd_SNP=ncd_tab[,3],ncd_Indel=rowSums(ncd_tab[,1:2]))
#sv
sv_dt=as.data.frame(readxl::read_excel("sv.xlsx",col_names = F))
colnames(sv_dt)=c("Sample","SV_type")
sv_dt$Sample=sub("_WGS","",sv_dt$Sample)
sv_dt$SV_type[grep("del",sv_dt$SV_type)]="Deletion"
sv_dt$SV_type[grep("invers",sv_dt$SV_type)]="Inversion"
sv_dt$SV_type[grep("ins",sv_dt$SV_type)]="Insertion"
sv_dt$SV_type[sv_dt$SV_type=="transl_inter"]="Translocation:inter-chromosomal"
sv_tab=table(sv_dt[,c("Sample","SV_type")])
sv_other=sv_tab[1:2,]
sv_other[sv_other!=0]=0
rownames(sv_other)=setdiff(sp_file,rownames(sv_tab))
sub_sv=as.data.frame(rbind(sv_tab,sv_other))
sub_sv$Sample=rownames(sub_sv)
#epi
epi=as.data.frame(read.delim("PC_WGBS_189pairs_DML_count_epi_burden.xls"))
colnames(epi)[1]="Sample"
#somatic signatuer
suppressMessages(library(plyr))
suppressMessages(library(dplyr))
suppressMessages(library(data.table))
parse_signatures = function(filepath) {
suppressWarnings(fread(filepath)) %>%
tbl_df() %>%
rename(Tumor_Sample_Barcode = `Sample Name`) %>%
select(-`Number of Mutations`) %>%
select(Tumor_Sample_Barcode, everything())
}
sign=as.data.frame(parse_signatures("Somatic_mutation.filter.trinuc.decomposed.208.maf"))
sign_main=sign[,c(1,2,4,6,9,10,17)]
sign_main$Others=1-rowSums(sign_main[,2:7])
colnames(sign_main)[1]="Sample"
sign_main$Sample=sub("_WGS","",sign_main$Sample)
#
mut_dat=merge(sub_cd,sub_ncd,by="Sample")
mut_dat=merge(mut_dat,sub_cnv,by="Sample")
mut_dat=merge(mut_dat,sub_sv,by="Sample")
mut_dat$Fusion=sub_fs[as.character(mut_dat$Sample)]
mut_dat=merge(mut_dat,epi[,1:3],by="Sample",all.x = T)
mut_dat[is.na(mut_dat)]=0
mut_dat=merge(mut_dat,sign_main,by="Sample")
#
cl=as.data.frame(readxl::read_excel("Supplementary+Table+1+Clinical+information+20190710(1).xlsx"))
rownames(cl)=paste("T",cl$`Sequencing ID`,sep="")
mt_208=cl[sp_file,]
mt_208$`Sequencing ID`=paste("T",mt_208$`Sequencing ID`,sep="")
colnames(mt_208)[3]="Sample"
#define risk_type,localised_locally
mt_208$PSA[mt_208$PSA==">100"]=101
mt_208$PSA=as.numeric(mt_208$PSA)
mt_208$`Pathology T`[mt_208$`Pathology T`=="pT2"]="pT2a"
mt_208$GS=as.numeric(mt_208$GS)
mt_208$`Pathology N`[mt_208$`Pathology N`=="pn0"]="pN0"
mt_208$`Pathology N`[mt_208$`Pathology N`=="pNX"]="pNx"
mt_208$risk_type=""
mt_208$risk_type[mt_208$PSA<10&mt_208$GS<7&mt_208$`Pathology T`=="pT2a"]="Low-risk"
mt_208$risk_type[(mt_208$PSA>=10&mt_208$PSA<=20)|mt_208$GS==7|mt_208$`Pathology T`=="pT2b"]="Intermediate-risk"
mt_208$risk_type[mt_208$PSA>20|mt_208$GS>7|mt_208$`Pathology T`=="pT2c"]="High-risk"
mt_208$localised_locally=""
mt_208$localised_locally[mt_208$risk_type!=""]="Localised"
mt_208$risk_type[mt_208$risk_type==""]="High-risk"
mt_208$localised_locally[mt_208$risk_type=="High-risk"&mt_208$localised_locally!="Localised"]="Locally advanced"
mt_208$localised_locally[mt_208$`Clinical M`=="M1a"|mt_208$`Clinical M`=="M1b"|mt_208$`Clinical M`=="M1c"]="Metastasis"
fig1_data=merge(mt_208,mut_dat,by="Sample")
write.table(fig1_data,"Mutation_summary_Arv.xls",sep="\t",row.names=F,quote=F)
suppressMessages(library(plyr))
suppressMessages(library(dplyr))
suppressMessages(library(data.table))
suppressMessages(library(ggplot2))
suppressMessages(library(grid))
suppressMessages(library(cowplot))
suppressMessages(library(stringr))
suppressMessages(library(RColorBrewer))
suppressMessages(library(Cairo))
'%nin%' = Negate('%in%')
#define functions
theme_bwmin = function(base_size = 12, base_family = 'Helvetica')
{
theme_bw(base_size = base_size, base_family = base_family) %+replace%
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
strip.background = element_blank(),
axis.line = element_blank(),
axis.ticks = element_line(size = 0.1),
legend.key = element_rect(color = 'white'),
legend.key.size = unit(.25, 'cm')
)
}
get.clinical.scale = function(cl_col,cl_lab) {
# Set1 colors
# use Perou's intrinsic subtype colors instead
colors = cl_col
color.names = cl_lab
names(colors) = color.names
clinical.color.scale = scale_fill_manual(name="clinic feature", values=colors)
return(clinical.color.scale)
}
sign_map = c(
'1'='Age','3'='BRCA1/2','9'='IGHV_hypermutation')
### Signature colors // grey for unknowns
grey_pal = colorRampPalette(brewer.pal(9, 'Greys'))(16) # Greys except for hand-picked signatures
sign_cols = c(
"#e41a1c","#4daf4a",grey_pal[2],"#a65628","#ff7f00","#984ea3",grey_pal[9])
mutation_summary=function(my_mt,cl_fig,cl_col,cl_lab,sample_order,rna_sp,epi_sp)
{
cl_melt=data.table::melt(as.matrix(as.data.frame(cl_fig)))
cl_melt$Var1=factor(cl_melt$Var1, levels=sample_order, ordered = T)
cl_plot=ggplot(cl_melt, aes(Var1,Var2)) + geom_tile(aes(fill = value),colour = "white",stat="identity")+
get.clinical.scale(cl_col,cl_lab)+
guides(fill=guide_legend(nrow=7,byrow = F,keywidth = .7, keyheight = .7,label.theme = element_text(size=12,family='Helvetica',angle=0)))+
theme_bwmin()+
theme(axis.ticks = element_blank(),axis.line = element_blank(),axis.text.x = element_blank(),
axis.title = element_blank(),legend.position = 'right')
#cnv
cnv_dt=my_mt[,c("loss","gain")]
cnv_dt$Sample=rownames(cnv_dt)
cnv_melt=melt(cnv_dt)
cnv_melt$Sample=factor(cnv_melt$Sample, levels=sample_order, ordered = T)
cnv_bar=ggplot(cnv_melt,aes(x=Sample,y=value))+
geom_bar(stat ='identity',aes(fill=factor(variable)))+labs(y="CNA",x="")+
guides(fill = guide_legend(title="",keywidth = .7, keyheight = .7,label.theme=element_text(size=12,family='Helvetica',angle=0)))+
scale_fill_manual(values =rev(c("#ca0020","#0571b0")))+theme_bwmin() +
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank(),axis.title.x = element_blank(),legend.position = 'right')+
geom_hline(aes(yintercept=0))
#sv
sv_dt=my_mt[,rev(c("Deletion","Insertion","Inversion","tandem_dup","Translocation:inter-chromosomal"))]
sv_dt$Sample=rownames(sv_dt)
sv_melt=melt(sv_dt)
sv_melt$Sample=factor(sv_melt$Sample, levels=sample_order, ordered = T)
sv_melt$label=""
p1=ggplot(sv_melt,aes(x=Sample,y=value))+
geom_bar(stat = 'identity',aes(fill=factor(variable)))+labs(y="SV",x="")+theme_bwmin() +
coord_cartesian(ylim=c(0,700),expand = F)+guides(fill = guide_legend(title="",keywidth = .7, keyheight = .7,label.theme =element_text(size=12,family='Helvetica',angle=0)))+
scale_fill_manual(values = rev(c("#2166ac","#92c5de","#fddbc7","#d6604d","#b2182b")))+
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank(),axis.title.x = element_blank(),legend.position = 'right')+
geom_hline(aes(yintercept=0))+theme(plot.margin = unit(c(1,1,1,2.5),"mm"))+
geom_text(aes(label=label),position = position_stack(1))
p2=ggplot(sv_melt,aes(x=Sample,y=value))+
geom_bar(stat = 'identity',aes(fill=factor(variable)))+
labs(y="",x="")+coord_cartesian(ylim=c(1800,2200),expand = F)+theme_bwmin() +
guides(fill = guide_legend(title="",keywidth = .7, keyheight = .7,label.theme = element_text(size=12,family='Helvetica',angle=0))) +
scale_fill_manual(values = rev(c("#2166ac","#92c5de","#fddbc7","#d6604d","#b2182b"))) +
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank(),
axis.title.x = element_blank(),legend.position = 'right')+
geom_hline(aes(yintercept=0))+theme(plot.margin = unit(c(1,1,0,0),"mm"))+
scale_y_continuous(labels = c(1800,2000,2200),breaks = c(1800,2000,2200))
#fusion
fs_dt=my_mt[,"Fusion"]
fs_dt[is.na(fs_dt)]=0
fs_dt=data.frame(Sample=rownames(my_mt),Fusion=fs_dt)
fs_melt=melt(fs_dt)
fs_melt$Sample=factor(fs_melt$Sample, levels=sample_order, ordered = T)
fs_melt$label=""
fs_melt$label[!is.element(as.character(fs_melt$Sample),rna_sp$V1)]="*"
fs_bar=ggplot(fs_melt,aes(x=Sample,y=value))+
geom_bar(stat = 'identity',aes(fill=factor(variable)))+
labs(y="Fusion",x="")+theme_bwmin() +
guides(fill = guide_legend(title="",keywidth = .7, keyheight = .7,label.theme = element_text(size=12,family='Helvetica',angle=0))) +
scale_fill_manual(values = "violetred") +
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank(),axis.title.x = element_blank(),legend.position = 'right')+
geom_hline(aes(yintercept=0))+geom_text(aes(label=label),position = position_stack(1))
#coding mutation
cd_dt=my_mt[,rev(c("cd_SNP","cd_Indel"))]
cd_dt$Sample=rownames(cd_dt)
cd_melt=melt(cd_dt)
cd_melt$Sample=factor(cd_melt$Sample, levels=sample_order, ordered = T)
cd_bar=ggplot(cd_melt,aes(x=Sample,y=value))+
geom_bar(stat = 'identity',aes(fill=factor(variable)))+
labs(y="Coding",x="")+theme_bwmin() +
guides(fill = guide_legend(title="",keywidth = .7, keyheight = .7,label.theme = element_text(size=12,family='Helvetica',angle=0))) +
scale_fill_manual(values = c("#ca0020","#0571b0"),labels=c("Indel","SNP")) +
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank(),axis.title.x = element_blank(),legend.position = 'right')+
geom_hline(aes(yintercept=0))
#non-coding mutation
ncd_dt=my_mt[,rev(c("ncd_SNP","ncd_Indel"))]
ncd_dt$Sample=rownames(ncd_dt)
ncd_melt=melt(ncd_dt)
ncd_melt$Sample=factor(ncd_melt$Sample, levels=sample_order, ordered = T)
ncd_bar=ggplot(ncd_melt,aes(x=Sample,y=value))+
geom_bar(stat = 'identity',aes(fill=factor(variable)))+
labs(y="Noncoding",x="")+theme_bwmin() +
guides(fill = guide_legend(title="",keywidth = .7, keyheight = .7,label.theme = element_text(size=12,family='Helvetica',angle=0))) +
scale_fill_manual(values = c("#d6604d","#4393c3"),labels=rev(c("SNP","Indel"))) +
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank(),axis.title.x = element_blank(),legend.position = 'right')+
geom_hline(aes(yintercept=0))
#epimutaion
epi_dt=my_mt[,c("hypoDMP","hyperDMP")]
epi_dt$Sample=rownames(epi_dt)
epi_melt=melt(epi_dt)
epi_melt$Sample=factor(epi_melt$Sample, levels=sample_order, ordered = T)
epi_melt$label=""
epi_melt$label[!is.element(as.character(epi_melt$Sample),epi_sp)]="*"
epi_bar=ggplot(epi_melt,aes(x=Sample,y=value))+
geom_bar(stat = 'identity',aes(fill=factor(variable)))+
labs(y="Epimutation",x="")+theme_bwmin() +
guides(fill = guide_legend(title="",keywidth = .7, keyheight = .7,label.theme = element_text(size=12,family='Helvetica',angle=0))) +
scale_fill_manual(values = c("#0072B2", "#D55E00"),labels=c("Hypomethylation","Hypermethylation")) +
theme(axis.ticks.x = element_blank(),axis.line.x = element_blank(),axis.text.x = element_blank(),
axis.title.x = element_blank(),legend.position = 'right')+
geom_hline(aes(yintercept=0))+geom_text(aes(label=label),position = position_stack(1))
sign_dt=my_mt[,c("Signature.1","Signature.3","Signature.5","Signature.8","Signature.9","Signature.16","Others")]
sign_dt$Sample=rownames(sign_dt)
sign_melt = melt(sign_dt, id.vars = c('Sample'), variable.name = 'Signature', value.name = 'Fraction') %>%
mutate(Signature = str_replace(Signature, 'Signature.', ''))
sign_melt$Signature = factor(sign_melt$Signature, levels = c(1,3,5,8,9,16,"Others"), ordered = T)
sign_melt$Signature = revalue(sign_melt$Signature, sign_map)
sign_melt$Sample=factor(sign_melt$Sample, levels=sample_order, ordered = T)
sample_sig = ggplot(sign_melt, aes(x = Sample, fill =Fraction)) +
geom_bar(aes(weight = Fraction, fill = Signature)) +
guides(fill = guide_legend(keywidth = .7, keyheight = .7, ncol= 3, title = 'Signature',byrow = F)) +
labs(y = 'Fraction of mutations', x = '') +
scale_fill_manual(values = sign_cols) +
scale_y_continuous(expand = c(0,0)) +
geom_hline(yintercept = c(0,1), size = .25) +
theme_bwmin() +
theme(axis.text.y = element_text(size = 13),
axis.text.x = element_text(angle = 90, vjust = .5, hjust = 0, size = 6),
axis.ticks.x = element_blank(),
plot.margin = unit(c(0,.25,0,.25), 'in'),
legend.position = 'right')
#merge
outplot = plot_grid(cl_plot, cnv_bar, p2,p1,fs_bar,cd_bar,ncd_bar,epi_bar,sample_sig,ncol = 1, align = 'v', rel_heights = c(4.3,2,3/4,5/4,2,2,2,2,3))
print(outplot)
}
#mutation data
epi=as.data.frame(read.delim("PC_WGBS_189pairs_DML_count_epi_burden.xls"))
colnames(epi)[1]="Sample"
epi_sp=epi$Sample
rna_sp=read.table("RNA_sample.xls",sep="\t",stringsAsFactors=F)
mt=read.table("Mutation_summary_Arv.xls",sep="\t",header=T,stringsAsFactors=F,check.name=F,row.names = 1)
cd_dt=mt[,rev(c("cd_SNP","cd_Indel"))]
sample_order=rownames(cd_dt)[order(rowSums(cd_dt),decreasing = T)]
mt=mt[sample_order,]
mt$`Pretreatment drug`[is.na(mt$`Pretreatment drug`)]="NO"
#clinical data
cl_dt=mt[,c("Age","Pathology N","Pathology T","GS","PSA","Bone Metastasis","Clinical M","Pretreatment drug")]
cl_dt$Age=ifelse(cl_dt$Age<=65,"50~65",ifelse(cl_dt$Age>65&cl_dt$Age<=70,"66~70",ifelse(cl_dt$Age>70&cl_dt$Age<=75,"71~75","76~")))
cl_dt$`Clinical M`=paste("c",cl_dt$`Clinical M`,sep="")
cl_dt$PSA=ifelse(cl_dt$PSA<=4,"<4",ifelse(cl_dt$PSA>4&cl_dt$PSA<=20,"4~20",">20"))
cl_dt$PSA=paste("PSA",cl_dt$PSA,sep=":")
cl_dt$GS=paste("GS",cl_dt$GS,sep=":")
cl_dt$GS[cl_dt$GS=="GS:NA"]=NA
cl_dt$Age=paste("Age",cl_dt$Age,sep=":")
cl_dt$Age[cl_dt$Age=="Age:NA"]=NA
colnames(cl_dt)[c(2,3,7)]=c("pN","pT","cM")
cl_dt$`Bone Metastasis`=paste("BM",cl_dt$`Bone Metastasis`,sep=":")
cl_dt$`Pretreatment drug`=paste("Pre-Treat",cl_dt$`Pretreatment drug`,sep=":")
cl_dt$`Pretreatment drug`[cl_dt$`Pretreatment drug`=="Pre-Treat:NA"]=NA
cl_dt=cl_dt[,c(1,6,7,2,3,4,5,8)]
#set color and clinical features
cl_col =c(RColorBrewer::brewer.pal(9, name = "GnBu")[c(6:9)],c("#377eb8","#4daf4a","#984ea3","#ff7f00","#ffff33","wheat2","#a65628"),c("gray43","white"),c(RColorBrewer::brewer.pal(9, name = "BuPu")[c(6,8)],"#9B808F50"),c("#f1eef6","#d7b5d8","#df65b0","#ce1256","#9B808F50"),c("#E0F3DB",RColorBrewer::brewer.pal(9, name = "YlGnBu")),c("#bcbddc","#756bb1","#9B808F50"),RColorBrewer::brewer.pal(9, name = "PiYG")[3:1],"#9B808F50",c("#f1eef6","#d0d1e6","#a6bddb","#74a9cf","#2b8cbe","#045a8d"),c("#fee5d9","#fcae91","#fd6a4a","#de2d26","#a50f15"),c("#edf8b1","#7fcdbb","#2c7fb8"),c("#009E73","#D55E00","#56B4E9"),RColorBrewer::brewer.pal(8, name = "PRGn")[3:1],RColorBrewer::brewer.pal(11, name = "Spectral")[4:9],"#9B808F50")
cl_lab=c("Age:50~65","Age:66~70","Age:71~75","Age:76~","Pre-Treat:bicalutamide","Pre-Treat:bicalutamide+Finasteride","Pre-Treat:Finasteride","Pre-Treat:Flutamide","Pre-Treat:Leuprorelin+bicalutamide","Pre-Treat:NO","Pre-Treat:Zoladex+bicalutamide","BM:NO","BM:YES","cN0","cN1","cNx","cM0","cM1a","cM1b","cM1c","cMx","cT0","cT1a","cT1b","cT1c","cT2a","cT2b","cT2c","cT3a","cT3b","cT3c","pN0","pN1","pNx","pM0","pM1a","pM1b","pMx","pT2a","pT2b","pT2c","pT3a","pT3b","pT4","GS:10","GS:6","GS:7","GS:8","GS:9","PSA:<4","PSA:4~20","PSA:>20","CNV:Some_cnv","CNV:Quiet","CNV:More_cnv","RNA1","RNA2","RNA3","DMR1","DMR2","DMR3","DMR4","DMR5","DMR6","NA")
#no_drug primary samples
sample_order=rownames(cd_dt)[order(rowSums(cd_dt),decreasing = T)]
samp=rownames(cl_dt)[(cl_dt$`Pretreatment drug`=="Pre-Treat:NO"|is.na(cl_dt$`Pretreatment drug`))&(cl_dt$cM=="cM0"|cl_dt$cM=="cMx")]
my_mt=mt[samp,]
cl_fig=cl_dt[samp,]
#CairoPDF(file = "Clinical_Mutation_part1_177_Arva.pdf", w = 30, h = 15)
mutation_summary(my_mt,cl_fig,cl_col,cl_lab,sample_order,rna_sp,epi_sp)