This is the R Markdown for Extended Dat Fig3, which consists of 6 parts.

Prepare Data:

CNV=read.table("PC_WGS_208pairs_filt_js100.all_thresholded.by_genes.txt",sep="\t",header = T,stringsAsFactors = F,row.names = 1)
CNV=CNV[,-c(1:2)]
colnames(CNV)=sub("_WGS","",colnames(CNV))
my_cnv=CNV["CHD1",]
my_cnv[my_cnv!=(-2)]=0
my_cnv[my_cnv==-2]=1
#fusion
fs_ct=read.table("Fusion_sampleID_genepair_validate_134_Arv.xls",sep="\t",stringsAsFactors=F,header=T)
fs=fs_ct
fs_gene=unlist(strsplit(fs$Fusion,split="--"))
fs$Gene1=fs_gene[seq(1,length(fs_gene),by=2)]
fs$Gene2=fs_gene[seq(2,length(fs_gene),by=2)]

fs_sp=data.frame(Gene=c(fs$Gene1,fs$Gene2),Sample=c(fs$Sample,fs$Sample))
fs_sp$Sample=as.character(fs_sp$Sample)
fs_tab=table(fs_sp)

fs_oth=matrix(0,ncol=length(setdiff(names(my_cnv),colnames(fs_tab))),nrow=nrow(fs_tab))
colnames(fs_oth)=setdiff(names(my_cnv),colnames(fs_tab))
fs_Tab=as.data.frame(cbind(fs_tab,fs_oth))

fs_Tab=fs_Tab[,names(my_cnv)]

mut_fs=rbind(my_cnv,fs_Tab[intersect(c("ERG","ETV1","ETV4","FLI1"),rownames(fs_Tab)),names(my_cnv)])
mut_fs["ERG",]=colSums(mut_fs[2:4,])
mut_fs=mut_fs[1:2,]
rownames(mut_fs)[2]="ETS"

mut_fs=as.data.frame(t(mut_fs))
mut_fs[mut_fs!=0]=1

#
chromoplexy=read.table("samples.chromoplexy_208.xls",header = F,stringsAsFactors = F,sep="\t")
chromothripsis=read.table("shatterseek-chromotrhiprisis_208.xls",header = F,stringsAsFactors = F,sep="\t")

chrpxy=chromoplexy[,1]
chrth=chromothripsis[,1]

mut_fs$chromoplexy=0
mut_fs$chromothripsis=0

mut_fs[chrpxy,"chromoplexy"]=1
mut_fs[chrth,"chromothripsis"]=1
mut_fs$Sample=rownames(mut_fs)
mut_fs[is.na(mut_fs)]=0
colnames(mut_fs)[1]="CHD1_DEL"

sv_dt=read.delim("Gene_pair_sv_annotation_TAD_summary_208.xls",sep="\t",header=T,stringsAsFactors=F)
fs_dt=read.delim("Fusion_fusionhub_sv_validated_Arv.xls",sep="\t",header=T,stringsAsFactors=F)
gene=c("ERG","ETV1","ETV4","FLI1")
#sv ,fusion call到的样本
sv_sp=list()
fs_sp=list()
sv_sp[[1]]=unique(unlist(strsplit(sv_dt$Sample[sv_dt$Pair1_Gene=="ERG"|sv_dt$Pair2_Gene=="ERG"],split=",")))
fs_sp[[1]]=unique(unlist(strsplit(fs_dt$Sample[fs_dt$Fusion=="TMPRSS2--ERG"],split=";")))
sv_sp[[2]]=unique(unlist(strsplit(sv_dt$Sample[sv_dt$Pair1_Gene=="ETV1"|sv_dt$Pair2_Gene=="ETV1"],split=",")))
fs_sp[[2]]=unique(unlist(strsplit(fs_dt$Sample[grep("ETV1",fs_dt$Fusion)],split=";")))
sv_sp[[3]]=unique(unlist(strsplit(sv_dt$Sample[c(grep("ETV4",sv_dt$Pair1_Gene),grep("ETV4",sv_dt$Pair2_Gene))],split=",")))
fs_sp[[3]]=unique(unlist(strsplit(fs_dt$Sample[grep("ETV4",fs_dt$Fusion)],split=";")))
sv_sp[[4]]=unique(unlist(strsplit(sv_dt$Sample[sv_dt$Pair1_Gene=="FLI1"|sv_dt$Pair2_Gene=="FLI1"],split=",")))
fs_sp[[4]]=unique(unlist(strsplit(fs_dt$Sample[grep("FLI1",fs_dt$Fusion)],split=";")))

SV_sp=unique(unlist(sv_sp))
FS_sp=unique(unlist(fs_sp))
SV_FS=intersect(SV_sp,FS_sp)
high_express=c("T729","T34","T273","T47","T84")
#mut_fs=read.table("CHD1_ETS_data.xls",sep='\t',header = T,stringsAsFactors = F)
rownames(mut_fs)=mut_fs$Sample
mut_fs$ETS[mut_fs$ETS==1]=2
mut_fs[intersect(SV_sp,FS_sp),"ETS"]=3
mut_fs[setdiff(SV_sp,FS_sp),"ETS"]=4
mut_fs[high_express,"ETS"]=5
write.table(mut_fs,"CHD1_ETS_fs_sv_highexp_data.xls",sep="\t",row.names = F,quote = F)

#Extended Fig2c data
sv_bp_tad=read.delim("SV_breakpoint_TAD_gene_208_DESeq2.xls",header=T,stringsAsFactors = F)
dt=sv_bp_tad[grep(paste("Inversion","-",sep=""),sv_bp_tad$sv_break_point),]
sub_dt=unique(dt[dt$sv_break_point=="Inversion-15:25753330-15:54385418",])

sub_dt=sub_dt[-which(sub_dt$TAD_WT_median<1.5&sub_dt$TAD_MUT_median<1.5&sub_dt$TAD_Normal_median<1.5),]

tad_out=read.table("TAD_gene_annotation_new.xls",sep="\t",header=T,stringsAsFactors = F)
colnames(tad_out)[c(1,8)]=c("TAD_region","TAD_Gene")
tad_out$TAD_region=sub("chr","",tad_out$TAD_region)

tad_dt=merge(sub_dt[,c(1,27:28,30:40,43:44)],tad_out[,c(1,8,11)],by=c("TAD_region","TAD_Gene"))
write.table(tad_dt,"SV_TAD_figure_DESeq2_208.xls",sep="\t",row.names = F,quote = F)

Figure a:

library(ggplot2)
library(data.table)
sv_dt=read.table("SV_sample_208.xls",sep='\t',header = T,stringsAsFactors = F)
sample_order=rownames(sv_dt)[order(rowSums(sv_dt),decreasing=T)]
sv_dt$Sample=rownames(sv_dt)
sv_melt=melt(sv_dt[,c(5:1,6)])

sv_melt$Sample=factor(sv_melt$Sample, levels=sample_order, ordered = T)

sv_bar=ggplot(sv_melt,aes(x=Sample,y=value))+
  geom_bar(stat = 'identity',aes(fill=factor(variable)))+
  labs(y="SV",x="")+
  guides(fill = guide_legend(title="",keywidth = .7, keyheight = .7,label.theme = element_text(size=12,family='Helvetica',angle=0))) +
  scale_fill_manual(values = rev(c("#2166ac","#92c5de","#fddbc7","#d6604d","#b2182b"))) +
  theme(panel.background =element_blank(),axis.text.x = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank(),
        axis.title.x = element_blank(),legend.position = 'right',panel.grid=element_blank())+
  geom_hline(aes(yintercept=0))

##heatmap
mut_fs=read.table("CHD1_ETS_fs_sv_highexp_data.xls",sep='\t',header = T,stringsAsFactors = F)

mfs_melt=data.table::melt(mut_fs[,c(5,2,1,4,3)])

mfs_melt$Sample=factor(mfs_melt$Sample, levels=sample_order, ordered = T)
mfs_melt$value=as.factor(mfs_melt$value)

mfs_ht=ggplot(mfs_melt)+ geom_tile(aes(x=Sample, y=variable, fill=value), color="grey80")+
  scale_fill_manual(values=c("white","black","#00770096","#005CCC96","#D55E0096","darkblue"),labels = c("WT","YES","Fusion","SV;Fusion","SV","Over_expression"))+
  labs(y="",x="")+
  guides(fill = guide_legend(title="",keywidth = .7, keyheight = .7,label.theme = element_text(size=12,family='Helvetica',angle=0))) +
  theme(panel.background = element_rect(fill = NA),axis.text.x = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank(),
        axis.title.x = element_blank(),legend.position = 'right')+
  geom_hline(aes(yintercept=0))

#merge
outplot = cowplot::plot_grid(sv_bar, mfs_ht,ncol = 1, align = 'v', rel_heights = c(1,0.25))
#pdf("Extended_Figure2a_208_v3.pdf",height=5,width=20)
print(outplot)

#dev.off()

Figure b:

library(foreach)
path="sub_svtype_annotation_208"
file=dir(path)
dt=foreach(i=1:length(file)) %do% read.delim(paste(path,file[i],sep="/"),sep="\t",stringsAsFactors=F,header=T)

type=sub("_Gene_pair_sv_annotation_summary_208.xls","",file)
type=type[order(type)]
freq=c(0.075,0.005,0.05,0.02,0.05)
mar1=c(10,10,15,10,15)

#pdf("Supp_Figure5a_208.pdf",width = 14,height=9)
for(i in 1:5){
  hub_gene_pair=dt[[i]][dt[[i]]$Freq>freq[i],]
  hub_sv_ty=hub_gene_pair[order(hub_gene_pair$Freq,decreasing = T),c(1,4)]
  rownames(hub_sv_ty)=hub_sv_ty$Gene_Pair
  #pdf(paste(type[i],"Gene_pair_0.05_sv_type_new.pdf"),height=10,width=30/44*nrow(hub_sv_ty))
  par(mar=c(mar1[i],4,4,4))
  if(i==2|i==3){
    bar=barplot(hub_sv_ty[,-1],beside = F,col=RColorBrewer::brewer.pal(8, name = "Set2")[c(1,5,2,6,7)[i]],ylab="Frequency of Alteration",names.arg = rep("",nrow(hub_sv_ty)),xlim=c(0,10))
  }else{
    bar=barplot(hub_sv_ty[,-1],beside = F,col=RColorBrewer::brewer.pal(8, name = "Set2")[c(1,5,2,6,7)[i]],ylab="Frequency of Alteration",names.arg = rep("",nrow(hub_sv_ty)))}
  if(i==3){
    text(bar+0.5,-0.001,labels =c(rownames(hub_sv_ty)[1:2],"GUSBP11--GGT1",rownames(hub_sv_ty)[4]),xpd=T,srt=45,pos=2)
  }else if(i==5){
    text(bar+0.5,-0.001,labels =foreach(k=1:nrow(hub_sv_ty),.combine = c) %do% strsplit(rownames(hub_sv_ty)[k],split=";")[[1]][1],xpd=T,srt=45,pos=2)
  }else{
    text(bar+0.5,-0.001,labels =foreach(k=1:nrow(hub_sv_ty),.combine = c) %do% strsplit(strsplit(rownames(hub_sv_ty)[k],split=";")[[1]],split="--")[[1]][1],xpd=T,srt=90,pos=2)
  }
  legend("topright",legend =type[i],col=RColorBrewer::brewer.pal(8, name = "Set2")[c(1,5,2,6,7)[i]],pch=15,bty="n")
}