This is the R Markdown for Supplementary Table 15, which consists of 5 parts.
TCGA_CP & SU2C:
source("Pathway_Primary_Metastatic_data.R")
Pathway:
#gene list in pathway analysied
library(foreach)
cgc_smg=read.table("CGC_SMG_gene.xls",sep="\t",stringsAsFactors=F,header=T)
cgc_smg$smg_source[is.na(cgc_smg$smg_source)]=""
rownames(cgc_smg)=cgc_smg$Gene
dt=as.data.frame(readxl::read_excel("Pathway+gene+oncogene+TSG+curated2.xlsx")[,1:3])
colnames(dt)[1]="Gene"
rownames(dt)=dt$Gene
dt$SMG_source=""
dt$SMG=""
dt[is.element(rownames(dt),rownames(cgc_smg)),c("SMG_source","SMG")]=cgc_smg[rownames(dt)[is.element(rownames(dt),rownames(cgc_smg))],2:3]
dt$CGC=dt$SMG
dt$SMG=sub("cgc","",sub("cgc;","",dt$SMG))
dt$CGC=sub("smg","",sub(";smg","",dt$CGC))
write.table(dt,"Pathway_gene_tab.xls",row.names=F,sep="\t",quote=F)
#caculate cnv,fusion.mutation,sv,non_coding mutation,methylation frequency
library(maftools)
maf=read.maf("Somatic_mutation.filter.208.maf", removeDuplicatedVariants = F)
mf=subsetMaf(maf, includeSyn = F, tsb = NULL, genes = NULL,fields = NULL, query = NULL, mafObj = FALSE, isTCGA = FALSE)
#filter outliners
mf$Variant_Classification=as.character(mf$Variant_Classification)
mf=mf[!is.element(mf$Tumor_Sample_Barcode,c("T502_WGS","T13_WGS")),]
#somatic mutation frequency
mf_tab=table(mf[,c(1,16)])
write.table(mf_tab,"Somatic_mutation_freq_per_gene.xls",sep="\t",quote=F)
#Fusion
sp_file=read.table("RNA_sample.xls",header = F,sep="\t",stringsAsFactors = F)
sp=sp_file$V1
sp=sp[!is.element(sp,c("T37","T274"))]
fs_ct=read.table("Fusion_sampleID_genepair_validate_134_Arv.xls",sep="\t",stringsAsFactors=F,header=T)
colnames(fs_ct)[1]="Gene_pair"
fs=fs_ct
fs_gene=unlist(strsplit(fs$Gene_pair,split="--"))
fs$Gene1=fs_gene[seq(1,length(fs_gene),by=2)]
fs$Gene2=fs_gene[seq(2,length(fs_gene),by=2)]
fs_sp=data.frame(Gene=c(fs$Gene1,fs$Gene2),Sample=c(fs$Sample,fs$Sample))
fs_sp$Sample=factor(fs_sp$Sample,levels = sp)
fs_tab=table(fs_sp)
fs_tab=fs_tab[,sp]
write.table(fs_tab,"Fusion_freq_per_gene.xls",sep="\t",quote = F)
#SV,deletion duplication
sp_file=read.table("samples_208.xls",header = T,sep="\t",stringsAsFactors = F)
sp=sp_file$Samples_208
path="subtype_sv/"
file1=c("Inversion_sv_annotation_208.xls","Insertion_sv_annotation_208.xls","Translocation_inter-chromosomal_sv_annotation_208.xls")
file2=c("Deletion_sv_annotation_208.xls","tandem_duplication_sv_annotation_208.xls")
dt1=foreach(i=1:length(file1)) %do% read.table(paste(path,file1[i],sep="/"),sep="\t",header=T,stringsAsFactors=F)
dt2=foreach(i=1:length(file2)) %do% read.table(paste(path,file2[i],sep="/"),sep="\t",header=T,stringsAsFactors=F)
DT1=do.call(rbind,dt1)
DT1_gene=data.frame(Sample=c(DT1$Sample,DT1$Sample),Gene=c(DT1$Pair1_Gene,DT1$Pair2_Gene))
DT2=do.call(rbind,dt2)
DT2=unique(DT2[,c(1,9)])
DT2=DT2[DT2$Gene!="",]
DT2_gene=foreach(i=1:nrow(DT2),.combine = rbind) %do% data.frame(Sample=DT2$Sample[i],Gene=unlist(strsplit(DT2$Gene[i],split=",")))
DT=rbind(unique(DT1_gene),unique(DT2_gene))
DT$Sample=factor(DT$Sample,levels = sp)
dt_tab=table(DT)
dt_tab=dt_tab[,-1]
dt_tab[dt_tab!=0]=1
write.table(t(dt_tab),"SV_sample_summary.xls",sep="\t",quote = F)
#non_coding somatic mutation
sp_file=read.table("samples_208.xls",header = T,sep="\t",stringsAsFactors = F)
sp=sp_file$Samples_208
sp=sp[!is.element(sp,c("T37","T274","T502","T13"))]
ncd=read.table("hotspot_promoterCore_cgc_smg_associate_min_max_oncokb_Arv.xls",sep="\t",header = T,stringsAsFactors = F)
sub_ncd=unique(ncd[,c(17,5)])
gene=unique(sub_ncd$Gene)
ncd_tab=matrix(0,nrow=length(gene),ncol=length(sp))
rownames(ncd_tab)=gene
colnames(ncd_tab)=sp
for(i in 1:nrow(ncd_tab)){
smp=unlist(strsplit(sub_ncd$Sample[sub_ncd$Gene==gene[i]],split=","))
ncd_tab[i,smp]=1
}
write.table(ncd_tab,"Noncoding_summary.xls",sep="\t",quote=F)
#methylation frequency
hyper=read.table("hyper_gene_dmr.xls",sep="\t",header=T,stringsAsFactors = F)
hypo=read.table("hypo_gene_dmr.xls",sep="\t",header=T,stringsAsFactors = F)
hyper[hyper=="hyper"]=1
hyper[hyper=="."]=0
hyper=hyper[,-1]
hyper_gn=names(table(hyper$Gene))[table(hyper$Gene)!=1]
for(i in 1:length(hyper_gn)){
gl=which(hyper$Gene==hyper_gn[i])
hyper[gl[1],-1]=colSums(apply(hyper[gl,-1],2,as.numeric))
hyper=hyper[-gl[-1],]
}
rownames(hyper)=hyper$Gene
hyper=hyper[,-1]
hyper[hyper!=0]=1
hypo[hypo=="."]=0
hypo[hypo=="hypo"]=1
hypo=hypo[,-1]
hypo_gn=names(table(hypo$Gene))[table(hypo$Gene)!=1]
for(i in 1:length(hypo_gn)){
gl=which(hypo$Gene==hypo_gn[i])
hypo[gl[1],-1]=colSums(apply(hypo[gl,-1],2,as.numeric))
hypo=hypo[-gl[-1],]
}
rownames(hypo)=hypo$Gene
hypo=hypo[,-1]
hypo[hypo!=0]=1
write.table(hyper,"DMR_hyper_summary.xls",sep="\t",quote=F)
write.table(hypo,"DMR_hypo_summary.xls",sep="\t",quote=F)
#calculate frequency
pth_dt=read.table("Pathway_gene_tab.xls",header=T,sep="\t",stringsAsFactors = F)
mut=read.table("Somatic_mutation_freq_per_gene.xls",sep="\t",header=T,stringsAsFactors = F,row.names = 1)
colnames(mut)=sub("_WGS","",colnames(mut))
mut[mut!=0]=1
CNV=read.table("PC_WGS_208pairs_filt_js100.all_thresholded.by_genes.txt",sep="\t",header = T,stringsAsFactors = F,row.names = 1)
CNV=CNV[,-c(1:2)]
colnames(CNV)=sub("_WGS","",colnames(CNV))
fs=read.table("Fusion_freq_per_gene.xls",sep="\t",header = T,stringsAsFactors = F,row.names = 1)
sv=read.table("SV_sample_summary.xls",sep="\t",header = T,stringsAsFactors = F,row.names = 1)
hyper=read.table("DMR_hyper_summary.xls",sep="\t",stringsAsFactors = F,header = T)
hypo=read.table("DMR_hypo_summary.xls",sep="\t",stringsAsFactors = F,header = T)
ncd=read.table("Noncoding_summary.xls",sep="\t",stringsAsFactors = F,header = T)
#the number of samples in every alter type is different.
sp=intersect(colnames(mut),colnames(CNV))
sp=sp[!is.element(sp,c("T502","T13"))]
gene=pth_dt$Gene
sub_mut=mut[intersect(rownames(mut),gene),sp]
sub_cnv=CNV[intersect(rownames(CNV),gene),sp]
sub_amp=sub_cnv
sub_amp[sub_amp!=2]=0
sub_amp[sub_amp!=0]=1
sub_del=sub_cnv
sub_del[sub_del!=-2]=0
sub_del[sub_del!=0]=1
dt_set=list(Mut=sub_mut,Amp=sub_amp,Del=sub_del)
for(i in 1:length(dt_set)){
smp=foreach(j=1:nrow(dt_set[[i]])) %do% colnames(dt_set[[i]])[which(dt_set[[i]][j,]==1)]
dt_set[[i]]$Alter_Sample=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% paste(smp[[j]],collapse=",")
dt_set[[i]]$Alter_N=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])
dt_set[[i]]$Alter_Freq=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])/length(sp)
dt_set[[i]]$Gene=rownames(dt_set[[i]])
colnames(dt_set[[i]])=sub("Alter",paste("Alter(",length(sp),")",sep=""),colnames(dt_set[[i]]))
write.table(dt_set[[i]][,c((length(sp)+4),(length(sp)+1):(length(sp)+3))],paste(names(dt_set)[i],"_sample_freq.xls",sep=""),sep="\t",row.names = F,quote=F)
}
#sv
sp_file=read.table("samples_208.xls",header = T,sep="\t",stringsAsFactors = F)
sp=sp_file$Samples_208
sub_sv=sv[intersect(rownames(sv),gene),sp]
dt_set=list(SV=sub_sv)
for(i in 1:length(dt_set)){
smp=foreach(j=1:nrow(dt_set[[i]])) %do% colnames(dt_set[[i]])[which(dt_set[[i]][j,]==1)]
dt_set[[i]]$Alter_Sample=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% paste(smp[[j]],collapse=",")
dt_set[[i]]$Alter_N=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])
dt_set[[i]]$Alter_Freq=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])/length(sp)
dt_set[[i]]$Gene=rownames(dt_set[[i]])
colnames(dt_set[[i]])=sub("Alter",paste("Alter(",length(sp),")",sep=""),colnames(dt_set[[i]]))
write.table(dt_set[[i]][,c((length(sp)+4),(length(sp)+1):(length(sp)+3))],paste(names(dt_set)[i],"_sample_freq.xls",sep=""),sep="\t",row.names = F,quote=F)
}
#fusion
sp_file=read.table("RNA_sample.xls",header = F,sep="\t",stringsAsFactors = F)
sp=sp_file$V1
sp=sp[!is.element(sp,c("T37","T274"))]
sub_fs=fs[intersect(rownames(fs),gene),sp]
dt_set=list(FS=sub_fs)
for(i in 1:length(dt_set)){
smp=foreach(j=1:nrow(dt_set[[i]])) %do% colnames(dt_set[[i]])[which(dt_set[[i]][j,]==1)]
dt_set[[i]]$Alter_Sample=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% paste(smp[[j]],collapse=",")
dt_set[[i]]$Alter_N=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])
dt_set[[i]]$Alter_Freq=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])/length(sp)
dt_set[[i]]$Gene=rownames(dt_set[[i]])
colnames(dt_set[[i]])=sub("Alter",paste("Alter(",length(sp),")",sep=""),colnames(dt_set[[i]]))
write.table(dt_set[[i]][,c((length(sp)+4),(length(sp)+1):(length(sp)+3))],paste(names(dt_set)[i],"_sample_freq.xls",sep=""),sep="\t",row.names = F,quote=F)
}
#dmr
sp=colnames(hyper)
sp=sp[!is.element(sp,c("T37","T274"))]
sub_hyper=hyper[is.element(rownames(hyper),gene),sp]
sub_hypo=hypo[is.element(rownames(hypo),gene),sp]
dt_set=list(Hyper=sub_hyper,Hypo=sub_hypo)
for(i in 1:length(dt_set)){
smp=foreach(j=1:nrow(dt_set[[i]])) %do% colnames(dt_set[[i]])[which(dt_set[[i]][j,]==1)]
dt_set[[i]]$Alter_Sample=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% paste(smp[[j]],collapse=",")
dt_set[[i]]$Alter_N=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])
dt_set[[i]]$Alter_Freq=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])/length(sp)
dt_set[[i]]$Gene=rownames(dt_set[[i]])
colnames(dt_set[[i]])=sub("Alter",paste("Alter(",length(sp),")",sep=""),colnames(dt_set[[i]]))
write.table(dt_set[[i]][,c((length(sp)+4),(length(sp)+1):(length(sp)+3))],paste(names(dt_set)[i],"_sample_freq.xls",sep=""),sep="\t",row.names = F,quote=F)
}
#ncd
sp_file=read.table("samples_208.xls",header = T,sep="\t",stringsAsFactors = F)
sp=sp_file$Samples_208
sp=sp[!is.element(sp,c("T502","T13"))]
sub_ncd=ncd[intersect(rownames(ncd),gene),sp]
dt_set=list(Ncd=sub_ncd)
for(i in 1:length(dt_set)){
smp=foreach(j=1:nrow(dt_set[[i]])) %do% colnames(dt_set[[i]])[which(dt_set[[i]][j,]==1)]
dt_set[[i]]$Alter_Sample=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% paste(smp[[j]],collapse=",")
dt_set[[i]]$Alter_N=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])
dt_set[[i]]$Alter_Freq=foreach(j=1:nrow(dt_set[[i]]),.combine=c) %do% length(smp[[j]])/length(sp)
dt_set[[i]]$Gene=rownames(dt_set[[i]])
colnames(dt_set[[i]])=sub("Alter",paste("Alter(",length(sp),")",sep=""),colnames(dt_set[[i]]))
write.table(dt_set[[i]][,c((length(sp)+4),(length(sp)+1):(length(sp)+3))],paste(names(dt_set)[i],"_sample_freq.xls",sep=""),sep="\t",row.names = F,quote=F)
}
#calculate pathway alter
sp=intersect(colnames(mut),intersect(c(colnames(CNV),colnames(sv)),colnames(hyper)))
sp=sp[sp!="T502"]
pth_dt=read.table("Pathway_gene_tab.xls",header=T,sep="\t",stringsAsFactors = F)
mut.type=c("Mut","Amp","Del","FS","SV","Hyper","Hypo","Ncd")
file=paste(mut.type,"_sample_freq.xls",sep="")
mut_set=foreach(i=1:length(file)) %do% read.table(file[i],sep="\t",header=T,stringsAsFactors = F,check.names=F)
for(i in 1:length(mut.type)){
colnames(mut_set[[i]])=sub("Alter",mut.type[i],colnames(mut_set[[i]]))
}
pth_Dat=pth_dt
for(i in 1:length(mut_set)){
pth_Dat=merge(pth_Dat,mut_set[[i]],by="Gene",all = T)
}
pth_Dat[is.na(pth_Dat)]=""
#OG-amp,TSG-del
pth_Dat$`Del(206)_Freq`[pth_Dat$OG_TSG=="OG"|pth_Dat$OG_TSG==""|pth_Dat$OG_TSG=="Unkown"]=""
pth_Dat$`Del(206)_Sample`[pth_Dat$OG_TSG=="OG"|pth_Dat$OG_TSG==""|pth_Dat$OG_TSG=="Unkown"]=""
pth_Dat$`Del(206)_N`[pth_Dat$OG_TSG=="OG"|pth_Dat$OG_TSG==""|pth_Dat$OG_TSG=="Unkown"]=""
pth_Dat$`Amp(206)_Freq`[pth_Dat$OG_TSG=="TSG"|pth_Dat$OG_TSG==""|pth_Dat$OG_TSG=="Unkown"]=""
pth_Dat$`Amp(206)_Sample`[pth_Dat$OG_TSG=="TSG"|pth_Dat$OG_TSG==""|pth_Dat$OG_TSG=="Unkown"]=""
pth_Dat$`Amp(206)_N`[pth_Dat$OG_TSG=="TSG"|pth_Dat$OG_TSG==""|pth_Dat$OG_TSG=="Unkown"]=""
#CNV
cnv_sp=foreach(i=1:nrow(pth_Dat)) %do% unique(unlist(strsplit(paste(unique(as.character(pth_Dat[i,c("Amp(206)_Sample","Del(206)_Sample")])),collapse=","),split=",")))
cnv_sp=foreach(i=1:length(cnv_sp)) %do% cnv_sp[[i]][cnv_sp[[i]]!=""]
pth_Dat$cnv_Sample=foreach(i=1:length(cnv_sp),.combine=c) %do% paste(cnv_sp[[i]],collapse=",")
pth_Dat$cnv_N=foreach(i=1:length(cnv_sp),.combine=c) %do% length(cnv_sp[[i]])
pth_Dat$cnv_Freq=foreach(i=1:length(cnv_sp),.combine=c) %do% length(cnv_sp[[i]])/206
colnames(pth_Dat)[grep("cnv",colnames(pth_Dat))]=sub("cnv","cnv(206)",colnames(pth_Dat)[grep("cnv",colnames(pth_Dat))])
#Methy
mth_sp=foreach(i=1:nrow(pth_Dat)) %do% unique(unlist(strsplit(paste(unique(as.character(pth_Dat[i,c("Hyper(187)_Sample","Hypo(187)_Sample")])),collapse=","),split=",")))
mth_sp=foreach(i=1:length(mth_sp)) %do% mth_sp[[i]][mth_sp[[i]]!=""]
pth_Dat$Methy_Sample=foreach(i=1:length(mth_sp),.combine=c) %do% paste(mth_sp[[i]],collapse=",")
pth_Dat$Methy_N=foreach(i=1:length(mth_sp),.combine=c) %do% length(mth_sp[[i]])
pth_Dat$Methy_Freq=foreach(i=1:length(mth_sp),.combine=c) %do% length(mth_sp[[i]])/187
colnames(pth_Dat)[grep("Methy",colnames(pth_Dat))]=sub("Methy","Methy(187)",colnames(pth_Dat)[grep("Methy",colnames(pth_Dat))])
#CPGEA_coding(mut+cnv+fs)
cd_sp=foreach(i=1:nrow(pth_Dat)) %do% unique(unlist(strsplit(paste(unique(as.character(pth_Dat[i,c("Mut(206)_Sample","Amp(206)_Sample","Del(206)_Sample","FS(134)_Sample")])),collapse=","),split=",")))
cd_sp=foreach(i=1:length(cd_sp)) %do% intersect(cd_sp[[i]][cd_sp[[i]]!=""],sp)
pth_Dat$CPGEA_coding_Sample=foreach(i=1:length(cd_sp),.combine=c) %do% paste(cd_sp[[i]],collapse=",")
pth_Dat$CPGEA_coding_N=foreach(i=1:length(cd_sp),.combine=c) %do% length(cd_sp[[i]])
pth_Dat$CPGEA_coding_Freq=foreach(i=1:length(cd_sp),.combine=c) %do% length(cd_sp[[i]])/186
colnames(pth_Dat)[grep("CPGEA_coding",colnames(pth_Dat))]=sub("CPGEA_coding","CPGEA_coding(186)",colnames(pth_Dat)[grep("CPGEA_coding",colnames(pth_Dat))])
#CPGEA_noncoding(sv+ncd+methy)
ncd_sp=foreach(i=1:nrow(pth_Dat)) %do% unique(unlist(strsplit(paste(unique(as.character(pth_Dat[i,c("SV(208)_Sample","Ncd(206)_Sample","Methy(187)_Sample")])),collapse=","),split=",")))
ncd_sp=foreach(i=1:length(ncd_sp)) %do% ncd_sp[[i]][ncd_sp[[i]]!=""]
pth_Dat$CPGEA_noncoding_Sample=foreach(i=1:length(ncd_sp),.combine=c) %do% paste(ncd_sp[[i]],collapse=",")
pth_Dat$CPGEA_noncoding_N=foreach(i=1:length(ncd_sp),.combine=c) %do% length(ncd_sp[[i]])
pth_Dat$CPGEA_noncoding_Freq=foreach(i=1:length(ncd_sp),.combine=c) %do% length(ncd_sp[[i]])/186
colnames(pth_Dat)[grep("CPGEA_noncoding",colnames(pth_Dat))]=sub("CPGEA_noncoding","CPGEA_noncoding(186)",colnames(pth_Dat)[grep("CPGEA_noncoding",colnames(pth_Dat))])
#(PGEA_coding+CPGEA_noncoding)--PGEA_all
all_sp=foreach(i=1:nrow(pth_Dat)) %do% unique(unlist(strsplit(paste(unique(as.character(pth_Dat[i,c("Mut(206)_Sample","cnv(206)_Sample")])),collapse=","),split=",")))
all_sp=foreach(i=1:length(all_sp)) %do% all_sp[[i]][all_sp[[i]]!=""]
pth_Dat$CPGEA_all_Sample=foreach(i=1:length(all_sp),.combine=c) %do% paste(all_sp[[i]],collapse=",")
pth_Dat$CPGEA_all_N=foreach(i=1:length(all_sp),.combine=c) %do% length(all_sp[[i]])
pth_Dat$CPGEA_all_Freq=foreach(i=1:length(all_sp),.combine=c) %do% length(all_sp[[i]])/206
colnames(pth_Dat)[grep("CPGEA_all",colnames(pth_Dat))]=sub("CPGEA_all","CPGEA_all(206)",colnames(pth_Dat)[grep("CPGEA_all",colnames(pth_Dat))])
#primary metastatic
Pri_tab=read.table("TCGA_Primary_pathway_summary.xls",sep="\t",header=T,stringsAsFactors=F)
Meta_tab=read.table("Su2c_Metastic_pathway_summary.xls",sep="\t",header=T,stringsAsFactors=F)
colnames(Pri_tab)=sub("All_alter","Primary_TCGA_CP(114)",colnames(Pri_tab))
colnames(Meta_tab)=sub("All_alter","Metastic_Su2c(150)",colnames(Meta_tab))
Pri_tab$Gene=rownames(Pri_tab)
Meta_tab$Gene=rownames(Meta_tab)
pth_Dat=merge(pth_Dat,Pri_tab[,c("Gene","Primary_TCGA_CP(114)_sample","Primary_TCGA_CP(114)_freq")],by="Gene",all.x = T)
pth_Dat=merge(pth_Dat,Meta_tab[,c("Gene","Metastic_Su2c(150)_sample","Metastic_Su2c(150)_freq")],by="Gene",all.x = T)
pth_Dat[is.na(pth_Dat)]=""
write.table(pth_Dat,"Pathwaylist_CPGEA_TCGA_Su2c_gene_freq_v2.xls",sep="\t",quote=F,row.names = F)
library(foreach)
sp=read.table("Pathway_sp.xls",sep="\t",header=F,stringsAsFactors = F)[,1]
pth=read.table("Pathwaylist_CPGEA_TCGA_Su2c_gene_freq_v2.xls",sep="\t",check.names = F,header=T,stringsAsFactors=F,row.names=1)
sub_pth=pth[pth$pathwaylist!="",]
pth_id=unique(unlist(strsplit(unique(sub_pth$pathwaylist[sub_pth$pathwaylist!=""]),split=";")))
mt_tab=function(colnm,type){
sub_mt=sub_pth[,colnm]
sub_mt=sub_mt[sub_mt[,1]!="",]
mt_pth=foreach(i=1:nrow(sub_mt),.combine = rbind) %do% cbind(Sample=unlist(strsplit(sub_mt[i,1],split=",")),pathway=sub_mt[i,2])
mt_pth=as.data.frame(mt_pth)
mt_pth$Sample=as.character(mt_pth$Sample)
mt_pth$pathway=as.character(mt_pth$pathway)
gl=grep(";",mt_pth$pathway)
sub_mt_pth=foreach(i=gl,.combine = rbind) %do% cbind(Sample=mt_pth$Sample[i],pathway=unlist(strsplit(mt_pth$pathway[i],split=";")))
mt_pth=rbind(mt_pth[-gl,],sub_mt_pth)
mt_pth=unique(mt_pth)
mt_pth$Sample=factor(mt_pth$Sample,levels=sp)
mt_pth$pathway=factor(mt_pth$pathway,levels=pth_id)
mt_tab=table(mt_pth)
mt_tab[mt_tab!=0]=type
return(t(mt_tab))
}
ncd_tab=mt_tab(c("CPGEA_noncoding(186)_Sample","pathwaylist"),"Noncoding_change")
cd_tab=mt_tab(c("CPGEA_coding(186)_Sample","pathwaylist"),"Coding_change")
pth_tab=matrix("",nrow=nrow(cd_tab),ncol=186)
colnames(pth_tab)=sp
rownames(pth_tab)=rownames(cd_tab)
for(i in 1:nrow(pth_tab)){
for(j in 1:ncol(pth_tab)){
pth_tab[i,j]=paste(cd_tab[i,j],ncd_tab[i,j],sep=";")
}
}
pth_tab=gsub("0;","",pth_tab)
pth_tab=gsub(";0","",pth_tab)
pth_tab=as.data.frame(pth_tab)
#pth_N=foreach(j=1:nrow(pth_tab),.combine = rbind) %do% table(as.character(as.matrix(pth_tab[j,])))
pth_tab$Coding_and_Noncoding_N=foreach(j=1:nrow(pth_tab),.combine = c) %do% length(which(pth_tab[j,]=="Coding_change;Noncoding_change"))
pth_tab$Coding_and_Noncoding_Freq=pth_tab$Coding_and_Noncoding_N/186
pth_tab$Coding_change_N=foreach(j=1:nrow(pth_tab),.combine = c) %do% length(which(pth_tab[j,]=="Coding_change"))
pth_tab$Coding_change_Freq=pth_tab$Coding_change_N/186
pth_tab$Noncoding_change_N=foreach(j=1:nrow(pth_tab),.combine = c) %do% length(which(pth_tab[j,]=="Noncoding_change"))
pth_tab$Noncoding_change_Freq=pth_tab$Noncoding_change_N/186
pth_tab=pth_tab[,c(187:192,1:186)]
#write.table(pth_tab,"Pathway_coding_noncoding_change_Arv_v2.xls",sep="\t",quote=F)
west_tab=read.table("Pathwaylist_CPGEA_TCGA_Su2c_gene_freq_v2.xls",sep="\t",check.names = F,header=T,stringsAsFactors=F,row.names=1)
Pri=read.table("TCGA_Primary_CNV_summary.xls",sep="\t",header=T,stringsAsFactors=F,check.name=F)
Meta=read.table("Su2c_Metastic_CNV_summary.xls",sep="\t",header=T,stringsAsFactors=F,check.name=F)
pth=unique(unlist(strsplit(unique(west_tab$pathwaylist[west_tab$pathwaylist!=""]),split=";")))
Pri_dt=matrix(0,ncol=114,nrow=length(pth))
rownames(Pri_dt)=pth
colnames(Pri_dt)=colnames(Pri)[1:114]
for(i in 1:nrow(Pri_dt)){
sp=unique(unlist(strsplit(west_tab[grep(rownames(Pri_dt)[i],west_tab$pathwaylist),"Primary_TCGA_CP(114)_sample"],split=",")))
Pri_dt[i,sp]=1
}
Pri_dt=as.data.frame(Pri_dt)
Pri_dt$N_Sample=rowSums(Pri_dt)
Pri_dt$Freq=Pri_dt$N_Sample/114
#write.table(Pri_dt[,c(115:116,1:114)],"Primary_pathway_alter_114_Arv_v2.xls",sep="\t",quote=F)
Meta_dt=matrix(0,ncol=150,nrow=length(pth))
rownames(Meta_dt)=pth
colnames(Meta_dt)=colnames(Meta)[1:150]
for(i in 1:nrow(Meta_dt)){
sp=unique(unlist(strsplit(west_tab[grep(rownames(Meta_dt)[i],west_tab$pathwaylist),"Metastic_Su2c(150)_sample"],split=",")))
Meta_dt[i,sp]=1
}
Meta_dt=as.data.frame(Meta_dt)
Meta_dt$N_Sample=rowSums(Meta_dt)
Meta_dt$Freq=Meta_dt$N_Sample/150
#write.table(Meta_dt[,c(151:152,1:150)],"Metastic_pathway_alter_150_Arv_v2.xls",sep="\t",quote=F)