This is the R Markdown for Supplementary Table 7, which consists of 1 part.

SV annotation Tier:

sv=read.delim("SV.meerkat.Results_2_208.xls",sep="\t",stringsAsFactors=F,header=T)
sv$Sample=sub("_WGS","",sv$Sample)
#enhancer annotation
enh1=sv[,3:4]
enh2=sv[,5:6]
enh1$Chr1=paste("chr",enh1$Chr1,sep="")
enh2$Chr2=paste("chr",enh2$Chr2,sep="")

write.table(unique(enh1[,c(1,2,2)]),"pos1.xls",sep="\t",col.names = F,row.names = F,quote=F)
write.table(unique(enh2[,c(1,2,2)]),"pos2.xls",sep="\t",col.names = F,row.names = F,quote=F)

#shell common line
awk -f combine.awk gencode.v27.metadata.HGNC/data|sed 's/ /\t/g' >Gencode_id.file

#pos1 annotation
mkdir pos1
bedtools intersect -a pos1.xls -b hg38_annotation/genehancer.v4.7.bed/data -wa -wb|sed 's/;/\t/g'|sed 's/=/\t/g'|awk '{print $4"_"$5"_"$6,$1,$2,$10,$12}'|sort|uniq|sed 's/ /\t/g' >pos1/pos1.Enhancer
bedtools intersect -a pos1.xls -b hg38_annotation/gencode.v27.promoterCore.bed/data -wa -wb |awk '{print $4"_"$5"_"$6,$1,$2,$8,$7}'|sort|uniq|sed 's/ /\t/g' >pos1/pos1.promotercore
bedtools intersect -a pos1.xls -b hg38_annotation/gencode.v27.3utr.bed/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$7}'|sort|uniq|sed 's/ /\t/g' >pos1/pos1.3utr
bedtools intersect -a pos1.xls -b hg38_annotation/gencode.v27.5utr.bed/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$7}'|sort|uniq|sed 's/ /\t/g' >pos1/pos1.5utr
bedtools intersect -a pos1.xls -b hg38_annotation/gencode.v27.exon.bed/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$7}'|sort|uniq|sed 's/ /\t/g' >pos1/pos1.exon
bedtools intersect -a pos1.xls -b hg38_annotation/gencode.v27.intron.bed/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$7}'|sort|uniq|sed 's/ /\t/g' >pos1/pos1.intron
bedtools intersect -a pos1.xls -b hg38_annotation/IGR2.1/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$10}'|sort|uniq|sed 's/ /\t/g' >pos1/pos1.IGR

#pos2 annotation 
mkdir pos2
bedtools intersect -a pos2.xls -b hg38_annotation/genehancer.v4.7.bed/data -wa -wb|sed 's/;/\t/g'|sed 's/=/\t/g'|awk '{print $4"_"$5"_"$6,$1,$2,$10,$12}'|sort|uniq|sed 's/ /\t/g' >pos2/pos2.Enhancer
bedtools intersect -a pos2.xls -b hg38_annotation/gencode.v27.promoterCore.bed/data -wa -wb |awk '{print $4"_"$5"_"$6,$1,$2,$8,$7}'|sort|uniq|sed 's/ /\t/g' >pos2/pos2.promotercore
bedtools intersect -a pos2.xls -b hg38_annotation/gencode.v27.3utr.bed/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$7}'|sort|uniq|sed 's/ /\t/g' >pos2/pos2.3utr
bedtools intersect -a pos2.xls -b hg38_annotation/gencode.v27.5utr.bed/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$7}'|sort|uniq|sed 's/ /\t/g' >pos2/pos2.5utr
bedtools intersect -a pos2.xls -b hg38_annotation/gencode.v27.exon.bed/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$7}'|sort|uniq|sed 's/ /\t/g' >pos2/pos2.exon
bedtools intersect -a pos2.xls -b hg38_annotation/gencode.v27.intron.bed/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$7}'|sort|uniq|sed 's/ /\t/g' >pos2/pos2.intron
bedtools intersect -a pos2.xls -b hg38_annotation/IGR2.1/data -wa -wb|awk '{print $4"_"$5"_"$6,$1,$2,$10}'|sort|uniq|sed 's/ /\t/g' >pos2/pos2.IGR

#transform trans to gene symbol
library(foreach)
id_file=read.table("Gencode_id.file",sep="\t",stringsAsFactors = F)
rownames(id_file)=id_file$V1

#pair1 Gene
file=dir('pos1')
rg=sub("pos1.","",file)
dt=foreach(i=1:length(file)) %do% read.table(paste("pos1",file[i],sep="/"),sep="\t",stringsAsFactors = F)

for(i in 1:length(file)){
  if(i!=3&i!=7){
    dt[[i]]$Gene=id_file[dt[[i]]$V4,2]
    dt[[i]]$Gene[is.na(dt[[i]]$Gene)]=dt[[i]]$V4[is.na(dt[[i]]$Gene)]
  }else{
    colnames(dt[[i]])[5]="Gene"
  }
  dt[[i]]$Gene=paste(paste(rg[i],dt[[i]]$Gene,sep="("),")",sep="")
  dt[[i]]=unique(dt[[i]][,c(2:3,5)])
}
pos1_gn=as.data.frame(do.call(rbind,dt))
pos1_gn$pair1_loc=paste(pos1_gn$V2,pos1_gn$V3,sep="_")
pos1_gn=pos1_gn[,4:3]
write.table(pos1_gn[order(pos1_gn$pair1_loc),],"Pos1_Gene.xls",sep="\t",row.names = F,col.names = F,quote=F)

#pair2 Gene
file=dir('pos2')
rg=sub("pos2.","",file)
dt=foreach(i=1:length(file)) %do% read.table(paste("pos2",file[i],sep="/"),sep="\t",stringsAsFactors = F)

for(i in 1:length(file)){
  if(i!=3&i!=7){
    dt[[i]]$Gene=id_file[dt[[i]]$V4,2]
    dt[[i]]$Gene[is.na(dt[[i]]$Gene)]=dt[[i]]$V4[is.na(dt[[i]]$Gene)]
  }else{
    colnames(dt[[i]])[5]="Gene"
  }
  dt[[i]]$Gene=paste(paste(rg[i],dt[[i]]$Gene,sep="("),")",sep="")
  dt[[i]]=unique(dt[[i]][,c(2:3,5)])
}
pos2_gn=as.data.frame(do.call(rbind,dt))
pos2_gn$pair2_loc=paste(pos2_gn$V2,pos2_gn$V3,sep="_")
pos2_gn=pos2_gn[,4:3]
write.table(pos2_gn[order(pos2_gn$pair2_loc),],"Pos2_Gene.xls",sep="\t",row.names = F,col.names = F,quote=F)

awk -f combine.awk Pos1_Gene.xls|sed 's/ /\t/g' >pos1/Pos1_gene_region.bed
awk -f combine.awk Pos2_Gene.xls|sed 's/ /\t/g' >pos2/Pos2_gene_region.bed

#annotation order:promoter-5UTR-3utr-exon-intron-IGR-Enhancer
sv=read.delim("SV.meerkat.Results_2_208.xls",sep="\t",stringsAsFactors=F,header=T)
sv$Sample=sub("_WGS","",sv$Sample)

pos1=read.table("pos1/Pos1_gene_region.bed",sep="\t",stringsAsFactors = F,header = F)
pos2=read.table("pos2/Pos2_gene_region.bed",sep="\t",stringsAsFactors = F,header = F)

pos1$Region=pos1$V2
pos1$Gene=""

gl=grep("promotercore",pos1$Region)
pos1$Region[gl]="Promoter"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("promotercore",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("promotercore\\(","",gene[[j]])),collapse = ";")

gl=grep("5utr",pos1$Region)
pos1$Region[gl]="5utr"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("5utr",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("5utr\\(","",gene[[j]])),collapse = ";")

gl=grep("3utr",pos1$Region)
pos1$Region[gl]="3utr"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("3utr",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("3utr\\(","",gene[[j]])),collapse = ";")

gl=grep("exon",pos1$Region)
pos1$Region[gl]="exon"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("exon",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("exon\\(","",gene[[j]])),collapse = ";")

gl=grep("intron",pos1$Region)
pos1$Region[gl]="intron"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("intron",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("intron\\(","",gene[[j]])),collapse = ";")

gl=grep("IGR",pos1$Region)
pos1$Region[gl]="IGR"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("IGR",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("IGR\\(","",gene[[j]])),collapse = ";")

pos1$Enhancer=""
gl=grep("Enhancer",pos1$V2)
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("Enhancer",gene[[j]])]
pos1$Enhancer[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("Enhancer\\(","",gene[[j]])),collapse = ";")
rownames(pos1)=pos1$V1

#pos2
pos2$Region=pos2$V2
pos2$Gene=""

gl=grep("promotercore",pos2$Region)
pos2$Region[gl]="Promoter"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("promotercore",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("promotercore\\(","",gene[[j]])),collapse = ";")

gl=grep("5utr",pos2$Region)
pos2$Region[gl]="5utr"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("5utr",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("5utr\\(","",gene[[j]])),collapse = ";")

gl=grep("3utr",pos2$Region)
pos2$Region[gl]="3utr"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("3utr",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("3utr\\(","",gene[[j]])),collapse = ";")

gl=grep("exon",pos2$Region)
pos2$Region[gl]="exon"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("exon",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("exon\\(","",gene[[j]])),collapse = ";")

gl=grep("intron",pos2$Region)
pos2$Region[gl]="intron"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("intron",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("intron\\(","",gene[[j]])),collapse = ";")

gl=grep("IGR",pos2$Region)
pos2$Region[gl]="IGR"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("IGR",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("IGR\\(","",gene[[j]])),collapse = ";")

pos2$Enhancer=""
gl=grep("Enhancer",pos2$V2)
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("Enhancer",gene[[j]])]
pos2$Enhancer[gl]=foreach(j=1:length(gene),.combine=c) %do% paste(gsub("\\)","",gsub("Enhancer\\(","",gene[[j]])),collapse = ";")
rownames(pos2)=pos2$V1

sv$loc1=paste("chr",paste(sv$Chr1,sv$Pos1,sep='_'),sep="")
sv$loc2=paste("chr",paste(sv$Chr2,sv$Pos2,sep='_'),sep="")

sv$Region1=pos1[sv$loc1,"Region"]
sv$Gene1=pos1[sv$loc1,"Gene"]
sv$Enhancer1=pos1[sv$loc1,"Enhancer"]

sv$Region2=pos2[sv$loc2,"Region"]
sv$Gene2=pos2[sv$loc2,"Gene"]
sv$Enhancer2=pos2[sv$loc2,"Enhancer"]

write.table(sv,"SV_pair_gene.xls",sep="\t",row.names = F,quote=F)

#IGR
gl=which(sv$Region1=="IGR")
igr1_100kb=sv[gl,3:4]
igr1_100kb$loc1=paste(igr1_100kb$Chr1,igr1_100kb$Pos1,sep="_")
igr1_100kb$Chr1=paste("chr",igr1_100kb$Chr1,sep="")
igr1_100kb$start=foreach(j=1:nrow(igr1_100kb),.combine=c) %do% max(igr1_100kb$Pos1[j]-100000,0)
igr1_100kb$end=igr1_100kb$Pos1+100000
write.table(unique(igr1_100kb[,c(1,4,5,3)]),"IGR1_100kb.bed",sep="\t",row.names = F,quote=F,col.names = F)

gl=which(sv$Region2=="IGR")
igr2_100kb=sv[gl,5:6]
igr2_100kb$loc2=paste(igr2_100kb$Chr2,igr2_100kb$Pos2,sep="_")
igr2_100kb$Chr2=paste("chr",igr2_100kb$Chr2,sep="")
igr2_100kb$start=foreach(j=1:nrow(igr2_100kb),.combine=c) %do% max(igr2_100kb$Pos2[j]-100000,0)
igr2_100kb$end=igr2_100kb$Pos2+100000
write.table(unique(igr2_100kb[,c(1,4,5,3)]),"IGR2_100kb.bed",sep="\t",row.names = F,quote=F,col.names = F)

#annotation IGR around 100kb 
mkdir igr1_100kb
bedtools intersect -a IGR1_100kb.bed -b hg38_annotation/genehancer.v4.7.bed/data -wa -wb|sed 's/;/\t/g'|sed 's/=/\t/g'|awk '{print $5"_"$6"_"$7,$4,$11,$13}'|sort|uniq|sed 's/ /\t/g' >igr1_100kb/IGR1_100kb.Enhancer
bedtools intersect -a IGR1_100kb.bed -b hg38_annotation/gencode.v27.promoterCore.bed/data -wa -wb |awk '{print $5"_"$6"_"$7,$4,$9,$8}'|sort|uniq|sed 's/ /\t/g' >igr1_100kb/IGR1_100kb.promotercore
bedtools intersect -a IGR1_100kb.bed -b hg38_annotation/gencode.v27.3utr.bed/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr1_100kb/IGR1_100kb.3utr
bedtools intersect -a IGR1_100kb.bed -b hg38_annotation/gencode.v27.5utr.bed/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr1_100kb/IGR1_100kb.5utr
bedtools intersect -a IGR1_100kb.bed -b hg38_annotation/gencode.v27.exon.bed/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr1_100kb/IGR1_100kb.exon
bedtools intersect -a IGR1_100kb.bed -b hg38_annotation/gencode.v27.intron.bed/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr1_100kb/IGR1_100kb.intron
bedtools intersect -a IGR1_100kb.bed -b hg38_annotation/IGR2.1/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr1_100kb/IGR1_100kb.IGR

mkdir igr2_100kb
bedtools intersect -a IGR2_100kb.bed -b hg38_annotation/genehancer.v4.7.bed/data -wa -wb|sed 's/;/\t/g'|sed 's/=/\t/g'|awk '{print $5"_"$6"_"$7,$4,$11,$13}'|sort|uniq|sed 's/ /\t/g' >igr2_100kb/IGR2_100kb.Enhancer
bedtools intersect -a IGR2_100kb.bed -b hg38_annotation/gencode.v27.promoterCore.bed/data -wa -wb |awk '{print $5"_"$6"_"$7,$4,$9,$8}'|sort|uniq|sed 's/ /\t/g' >igr2_100kb/IGR2_100kb.promotercore
bedtools intersect -a IGR2_100kb.bed -b hg38_annotation/gencode.v27.3utr.bed/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr2_100kb/IGR2_100kb.3utr
bedtools intersect -a IGR2_100kb.bed -b hg38_annotation/gencode.v27.5utr.bed/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr2_100kb/IGR2_100kb.5utr
bedtools intersect -a IGR2_100kb.bed -b hg38_annotation/gencode.v27.exon.bed/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr2_100kb/IGR2_100kb.exon
bedtools intersect -a IGR2_100kb.bed -b hg38_annotation/gencode.v27.intron.bed/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr2_100kb/IGR2_100kb.intron
bedtools intersect -a IGR2_100kb.bed -b hg38_annotation/IGR2.1/data -wa -wb|awk '{print $5"_"$6"_"$7,$4,$8}'|sort|uniq|sed 's/ /\t/g' >igr2_100kb/IGR2_100kb.IGR

#IGR 1
file=dir('igr1_100kb')
rg=sub("IGR1_100kb.","",file)
dt=foreach(i=1:length(file)) %do% read.table(paste("igr1_100kb",file[i],sep="/"),sep="\t",stringsAsFactors = F)

for(i in 1:length(file)){
  if(i!=3&i!=7){
    dt[[i]]$Gene=id_file[dt[[i]]$V3,2]
    dt[[i]]$Gene[is.na(dt[[i]]$Gene)]=dt[[i]]$V3[is.na(dt[[i]]$Gene)]
  }else{
    colnames(dt[[i]])[4]="Gene"
  }
  dt[[i]]$Gene=paste(paste(rg[i],dt[[i]]$Gene,sep="("),")",sep="")
  dt[[i]]=unique(dt[[i]][,-3])
}
#the nearest gene
pos1_gn=as.data.frame(do.call(rbind,dt))
sd=unlist(strsplit(pos1_gn$V1,split="_"))
sd=as.numeric(sd[seq(2,length(sd),by=3)])
ed=unlist(strsplit(pos1_gn$V1,split="_"))
ed=as.numeric(ed[seq(3,length(ed),by=3)])
pos=unlist(strsplit(pos1_gn$V2,split="_"))
pos=as.numeric(pos[seq(2,length(pos),by=2)])

min.dist=pmin(abs(sd-pos),abs(ed-pos))
pos1_gn$min.dist=min.dist
igr=unique(pos1_gn$V2)
Min=list()
for(i in 1:length(igr)){
  gl=which(pos1_gn$V2==igr[i])
  Min[[i]]=gl[which(pos1_gn$min.dist[gl]==min(pos1_gn$min.dist[gl]))]
  #pos1_gn=pos1_gn[-gl[-Min],]
}
pos1_gn=pos1_gn[unlist(Min),]
pos1_gn=pos1_gn[order(pos1_gn$V2),]
write.table(pos1_gn,"IGR1_100kb_Gene.xls",sep="\t",row.names = F,quote=F)
write.table(pos1_gn[,2:3],"IGR1_Gene.xls",sep="\t",row.names = F,col.names = F,quote=F)

file=dir('igr2_100kb')
rg=sub("IGR2_100kb.","",file)
dt=foreach(i=1:length(file)) %do% read.table(paste("igr2_100kb",file[i],sep="/"),sep="\t",stringsAsFactors = F)

for(i in 1:length(file)){
  if(i!=3&i!=7){
    dt[[i]]$Gene=id_file[dt[[i]]$V3,2]
    dt[[i]]$Gene[is.na(dt[[i]]$Gene)]=dt[[i]]$V3[is.na(dt[[i]]$Gene)]
  }else{
    colnames(dt[[i]])[4]="Gene"
  }
  dt[[i]]$Gene=paste(paste(rg[i],dt[[i]]$Gene,sep="("),")",sep="")
  dt[[i]]=unique(dt[[i]][,-3])
}
pos2_gn=as.data.frame(do.call(rbind,dt))
sd=unlist(strsplit(pos2_gn$V1,split="_"))
sd=as.numeric(sd[seq(2,length(sd),by=3)])
ed=unlist(strsplit(pos2_gn$V1,split="_"))
ed=as.numeric(ed[seq(3,length(ed),by=3)])
pos=unlist(strsplit(pos2_gn$V2,split="_"))
pos=as.numeric(pos[seq(2,length(pos),by=2)])

min.dist=pmin(abs(sd-pos),abs(ed-pos))
pos2_gn$min.dist=min.dist
igr=unique(pos2_gn$V2)
Min=list()
for(i in 1:length(igr)){
  gl=which(pos2_gn$V2==igr[i])
  Min[[i]]=gl[which(pos2_gn$min.dist[gl]==min(pos2_gn$min.dist[gl]))]
  #pos2_gn=pos2_gn[-gl[-Min],]
}
pos2_gn=pos2_gn[unlist(Min),]
pos2_gn=pos2_gn[order(pos2_gn$V2),]
write.table(pos2_gn,"IGR2_100kb_Gene.xls",sep="\t",row.names = F,quote=F)
write.table(pos2_gn[,2:3],"IGR2_Gene.xls",sep="\t",row.names = F,col.names = F,quote=F)

awk -f combine.awk IGR1_Gene.xls|sed 's/ /\t/g' >IGR1_gene_region.bed
awk -f combine.awk IGR2_Gene.xls|sed 's/ /\t/g' >IGR2_gene_region.bed

#add IGR annotation
sv=read.delim("SV_pair_gene.xls",sep="\t",stringsAsFactors=F,header=T)
sv$SV_type[sv$SV_type=="intra_chr_translocatioin"]="inversion"
sv$SV_type[grep("deletion",sv$SV_type)]="Deletion"
sv$SV_type[sv$SV_type=="inter_chr_translocation"]="Translocation:inter-chromosomal"
sv$SV_type[sv$SV_type=="inversion"]="Inversion"
sv$SV_type[sv$SV_type=="insertion"]="Insertion"
pos1=read.table("IGR1_gene_region.bed",sep="\t",stringsAsFactors = F,fill = T)
pos2=read.table("IGR2_gene_region.bed",sep="\t",stringsAsFactors = F,fill=T)

pos1$Region=pos1$V2
pos1$Gene=""

gl=grep("promotercore",pos1$V2)
pos1$Region[gl]="Promoter"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("promotercore",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("promotercore\\(","",gene[[j]])),collapse = ";")

gl=grep("5utr",pos1$Region)
if(length(gl)!=0){
pos1$Region[gl]="5utr"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("5utr",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("5utr\\(","",gene[[j]])),collapse = ";")
}

gl=grep("3utr",pos1$Region)
if(length(gl)!=0){
pos1$Region[gl]="3utr"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("3utr",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("3utr\\(","",gene[[j]])),collapse = ";")
}

gl=grep("exon",pos1$Region)
pos1$Region[gl]="exon"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("exon",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("exon\\(","",gene[[j]])),collapse = ";")

gl=grep("intron",pos1$Region)
if(length(gl)!=0){
pos1$Region[gl]="intron"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("intron",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("intron\\(","",gene[[j]])),collapse = ";")
}

gl=grep("IGR",pos1$Region)
pos1$Region[gl]="IGR"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("IGR",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("IGR\\(","",gene[[j]])),collapse = ";")

gl=grep("Enhancer",pos1$V2)
pos1$Region[gl]="Enhancer"
gene=foreach(j=gl) %do% strsplit(pos1$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("Enhancer",gene[[j]])]
pos1$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("Enhancer\\(","",gene[[j]])),collapse = ";")
rownames(pos1)=pos1$V1

#pos2
pos2$Region=pos2$V2
pos2$Gene=""

gl=grep("promotercore",pos2$V2)
pos2$Region[gl]="Promoter"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("promotercore",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("promotercore\\(","",gene[[j]])),collapse = ";")

gl=grep("5utr",pos2$Region)
if(length(gl)!=0){
pos2$Region[gl]="5utr"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("5utr",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("5utr\\(","",gene[[j]])),collapse = ";")
}

gl=grep("3utr",pos2$Region)
if(length(gl)!=0){
pos2$Region[gl]="3utr"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("3utr",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("3utr\\(","",gene[[j]])),collapse = ";")
}

gl=grep("exon",pos2$Region)
pos2$Region[gl]="exon"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("exon",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("exon\\(","",gene[[j]])),collapse = ";")

gl=grep("intron",pos2$Region)
if(length(gl)!=0){
pos2$Region[gl]="intron"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("intron",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("intron\\(","",gene[[j]])),collapse = ";")
}

gl=grep("IGR",pos2$Region)
pos2$Region[gl]="IGR"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("IGR",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine = c) %do% paste(gsub("\\)","",gsub("IGR\\(","",gene[[j]])),collapse = ";")

pos2$Enhancer=""
gl=grep("Enhancer",pos2$V2)
pos2$Region[gl]="Enhancer"
gene=foreach(j=gl) %do% strsplit(pos2$V2[j],split=";")[[1]]
gene=foreach(j=1:length(gene)) %do% gene[[j]][grep("Enhancer",gene[[j]])]
pos2$Gene[gl]=foreach(j=1:length(gene),.combine=c) %do% paste(gsub("\\)","",gsub("Enhancer\\(","",gene[[j]])),collapse = ";")
rownames(pos2)=pos2$V1

sv$IGR_Region1=pos1[sub("chr","",sv$loc1),"Region"]
sv$IGR_Gene1=pos1[sub("chr","",sv$loc1),"Gene"]
sv$IGR_Enhancer1=pos1[sub("chr","",sv$loc1),"Enhancer"]

sv$IGR_Region2=pos2[sub("chr","",sv$loc2),"Region"]
sv$IGR_Gene2=pos2[sub("chr","",sv$loc2),"Gene"]
sv$IGR_Enhancer2=pos2[sub("chr","",sv$loc2),"Enhancer"]

#tier annotation
sv$tier=""
gl=which(sv$Gene1==sv$Gene2&sv$Region1!="IGR"&sv$Region2!="IGR")
sv$tier[gl]="tier1"

gl=which(sv$Gene1!=sv$Gene2&sv$Region1!="IGR"&sv$Region2!="IGR")
sv$tier[gl]="tier2"

gl=which((sv$Region1!="IGR"&sv$Region2=="IGR"&sv$IGR_Region2!="Enhancer"&sv$IGR_Region2!="IGR"&sv$Gene1==sv$IGR_Gene2)|(sv$Region1=="IGR"&sv$Region2!="IGR"&sv$IGR_Region1!="Enhancer"&sv$IGR_Region1!="IGR"&sv$IGR_Gene1==sv$Gene2))
sv$tier[gl]="tier3.1"

gl=which((sv$Region1!="IGR"&sv$Region2=="IGR"&sv$IGR_Region2!="Enhancer"&sv$IGR_Region2!="IGR"&sv$Gene1!=sv$IGR_Gene2)|(sv$Region1=="IGR"&sv$Region2!="IGR"&sv$IGR_Gene1!="Enhancer"&sv$IGR_Region1!="IGR"&sv$IGR_Gene1!=sv$Gene2))
sv$tier[gl]="tier3.2"

gl=which((sv$Region1!="IGR"&sv$Region2=="IGR"&sv$IGR_Region2=="Enhancer")|(sv$Region1=="IGR"&sv$Region2!="IGR"&sv$IGR_Region1=="Enhancer"))
sv$tier[gl]="tier3.3"

gl=which((sv$Region1!="IGR"&sv$Region2=="IGR"&sv$IGR_Region2=="IGR"))
sv$tier[gl]="tier3.4"
gl=which((sv$Region1=="IGR"&sv$Region2!="IGR"&sv$IGR_Region1=="IGR"))
sv$tier[gl]="tier3.4"

gl=which(sv$Region1=="IGR"&sv$Region2=="IGR"&sv$IGR_Region1!="IGR"&sv$IGR_Region2!="IGR"&sv$IGR_Region1!="Enhancer"&sv$IGR_Region2!="Enhancer"&sv$IGR_Gene1==sv$IGR_Gene2)
sv$tier[gl]="tier4.1"

gl=which(sv$Region1=="IGR"&sv$Region2=="IGR"&sv$IGR_Region1!="IGR"&sv$IGR_Region2!="IGR"&sv$IGR_Region1!="Enhancer"&sv$IGR_Region2!="Enhancer"&sv$IGR_Gene1!=sv$IGR_Gene2)
sv$tier[gl]="tier4.2"

gl=which(sv$Region1=="IGR"&sv$Region2=="IGR"&sv$IGR_Region1=="Enhancer"&sv$IGR_Region2=="Enhancer")
sv$tier[gl]="tier4.3"

gl=which(sv$Region1=="IGR"&sv$Region2=="IGR"&((sv$IGR_Region1=="Enhancer"&sv$IGR_Region2!="Enhancer")|(sv$IGR_Region1!="Enhancer"&sv$IGR_Region2=="Enhancer")))
sv$tier[gl]="tier4.4"

gl=which(sv$Region1=="IGR"&sv$Region2=="IGR"&((sv$IGR_Region1=="IGR"&sv$IGR_Region2!="IGR")|(sv$IGR_Region1!="IGR"&sv$IGR_Region2=="IGR")))
sv$tier[gl]="tier4.5"

gl=which(sv$Region1=="IGR"&sv$Region2=="IGR"&sv$IGR_Region1=="IGR"&sv$IGR_Region2=="IGR")
sv$tier[gl]="tier5"
sv[is.na(sv)]=""
sv$IGR_Gene1[sv$IGR_Region1=="IGR"]=""
sv$IGR_Gene2[sv$IGR_Region2=="IGR"]=""

#Ensure the final pari gene
sv$Pair1_Gene=""
sv$Pair1_Gene[sv$tier=="tier1"]=sv$Gene1[sv$tier=="tier1"]
sv$Pair1_Gene[sv$tier=="tier2"]=sv$Gene1[sv$tier=="tier2"]
sv$Pair1_Gene[sv$tier=="tier3.1"&sv$Region1=="IGR"]=sv$IGR_Gene1[sv$tier=="tier3.1"&sv$Region1=="IGR"]
sv$Pair1_Gene[sv$tier=="tier3.1"&sv$Region1!="IGR"]=sv$Gene1[sv$tier=="tier3.1"&sv$Region1!="IGR"]
sv$Pair1_Gene[sv$tier=="tier3.2"&sv$Region1=="IGR"]=sv$IGR_Gene1[sv$tier=="tier3.2"&sv$Region1=="IGR"]
sv$Pair1_Gene[sv$tier=="tier3.2"&sv$Region1!="IGR"]=sv$Gene1[sv$tier=="tier3.2"&sv$Region1!="IGR"]
sv$Pair1_Gene[sv$tier=="tier3.3"&sv$Region1=="IGR"]=sv$IGR_Gene1[sv$tier=="tier3.3"&sv$Region1=="IGR"]
sv$Pair1_Gene[sv$tier=="tier3.3"&sv$Region1!="IGR"]=sv$Gene1[sv$tier=="tier3.3"&sv$Region1!="IGR"]
sv$Pair1_Gene[sv$tier=="tier3.4"&sv$Region1=="IGR"]=sv$IGR_Gene1[sv$tier=="tier3.4"&sv$Region1=="IGR"]
sv$Pair1_Gene[sv$tier=="tier3.4"&sv$Region1!="IGR"]=sv$Gene1[sv$tier=="tier3.4"&sv$Region1!="IGR"]

sv$Pair1_Gene[sv$tier=="tier4.1"]=sv$IGR_Gene1[sv$tier=="tier4.1"]
sv$Pair1_Gene[sv$tier=="tier4.2"]=sv$IGR_Gene1[sv$tier=="tier4.2"]
sv$Pair1_Gene[sv$tier=="tier4.3"]=sv$IGR_Gene1[sv$tier=="tier4.3"]
sv$Pair1_Gene[sv$tier=="tier4.4"]=sv$IGR_Gene1[sv$tier=="tier4.4"]

sv$Pair1_Gene[sv$tier=="tier4.5"]=sv$IGR_Gene1[sv$tier=="tier4.5"]
sv$Pair1_Gene[sv$tier=="tier5"]=sv$IGR_Gene1[sv$tier=="tier5"]

sv$Pair2_Gene=""
sv$Pair2_Gene[sv$tier=="tier1"]=sv$Gene2[sv$tier=="tier1"]
sv$Pair2_Gene[sv$tier=="tier2"]=sv$Gene2[sv$tier=="tier2"]
sv$Pair2_Gene[sv$tier=="tier3.1"&sv$Region2=="IGR"]=sv$IGR_Gene2[sv$tier=="tier3.1"&sv$Region2=="IGR"]
sv$Pair2_Gene[sv$tier=="tier3.1"&sv$Region2!="IGR"]=sv$Gene2[sv$tier=="tier3.1"&sv$Region2!="IGR"]
sv$Pair2_Gene[sv$tier=="tier3.2"&sv$Region2=="IGR"]=sv$IGR_Gene2[sv$tier=="tier3.2"&sv$Region2=="IGR"]
sv$Pair2_Gene[sv$tier=="tier3.2"&sv$Region2!="IGR"]=sv$Gene2[sv$tier=="tier3.2"&sv$Region2!="IGR"]
sv$Pair2_Gene[sv$tier=="tier3.3"&sv$Region2=="IGR"]=sv$IGR_Gene2[sv$tier=="tier3.3"&sv$Region2=="IGR"]
sv$Pair2_Gene[sv$tier=="tier3.3"&sv$Region2!="IGR"]=sv$Gene2[sv$tier=="tier3.3"&sv$Region2!="IGR"]
sv$Pair2_Gene[sv$tier=="tier3.4"&sv$Region2=="IGR"]=sv$IGR_Gene2[sv$tier=="tier3.4"&sv$Region2=="IGR"]
sv$Pair2_Gene[sv$tier=="tier3.4"&sv$Region2!="IGR"]=sv$Gene2[sv$tier=="tier3.4"&sv$Region2!="IGR"]

sv$Pair2_Gene[sv$tier=="tier4.1"]=sv$IGR_Gene2[sv$tier=="tier4.1"]
sv$Pair2_Gene[sv$tier=="tier4.2"]=sv$IGR_Gene2[sv$tier=="tier4.2"]
sv$Pair2_Gene[sv$tier=="tier4.3"]=sv$IGR_Gene2[sv$tier=="tier4.3"]
sv$Pair2_Gene[sv$tier=="tier4.4"]=sv$IGR_Gene2[sv$tier=="tier4.4"]

sv$Pair2_Gene[sv$tier=="tier4.5"]=sv$IGR_Gene2[sv$tier=="tier4.5"]
sv$Pair2_Gene[sv$tier=="tier5"]=sv$IGR_Gene2[sv$tier=="tier5"]

sv=sv[,c(1:23,25:26,24)]
write.table(sv,"SV_pair_gene_tier.xls",sep="\t",row.names = F,quote=F)

#add functional annotation
func=read.table("CancerGenesList.txt",sep="\t",header=T,stringsAsFactors=F,row.names=1)
func$OncoKB.OG[func$OncoKB.OG==""]=NA
func$OncoKB.TSG[func$OncoKB.TSG==""]=NA
func$OG_TSG=foreach(i=1:nrow(func),.combine = c) %do% paste(na.omit(c(func$OncoKB.OG[i],func$OncoKB.TSG[i])),collapse=",")
cgc_smg=read.table("CGC_SMG_gene.xls",sep="\t",header=T,row.names=1,stringsAsFactors=F)
ccg <- as.data.frame(readr::read_csv("Census_allTue Jan  2 12_17_09 2018.csv"))
cgc_gene=ccg$`Gene Symbol`
rownames(ccg)=ccg$`Gene Symbol`

pathw=as.data.frame(readxl::read_excel("Pathway+gene+oncogene+TSG+curated2.xlsx")[,1:3])
colnames(pathw)[1]="Gene"
rownames(pathw)=pathw$Gene

sv$Pair1_OncoKB.Annotated=""
sv[is.element(sv$Pair1_Gene,rownames(func)),"Pair1_OncoKB.Annotated"]=func[sv$Pair1_Gene[is.element(sv$Pair1_Gene,rownames(func))],2]
sv$Pair1_OncoKB_OG_TSG=""
sv[is.element(sv$Pair1_Gene,rownames(func)),"Pair1_OncoKB_OG_TSG"]=func[sv$Pair1_Gene[is.element(sv$Pair1_Gene,rownames(func))],"OG_TSG"]

sv$Pair1_SMG_source=""
sv$Pair1_SMG=""
sv[is.element(sv$Pair1_Gene,rownames(cgc_smg)),c("Pair1_SMG_source","Pair1_SMG")]=cgc_smg[sv$Pair1_Gene[is.element(sv$Pair1_Gene,rownames(cgc_smg))],]
sv$Pair1_CGC=sv$Pair1_SMG

sv$Pair1_SMG=sub("cgc","",sub("cgc;","",sv$Pair1_SMG))
sv$Pair1_CGC=sub("smg","",sub(";smg","",sv$Pair1_CGC))

#CGC
sv$Pair1_CGC_OG_TSG=""
sv[is.element(sv$Pair1_Gene,rownames(ccg)),"Pair1_CGC_OG_TSG"]=ccg[sv$Pair1_Gene[is.element(sv$Pair1_Gene,rownames(ccg))],"Role in Cancer"]

sv$Pair1_pathwaylist=""
sv$Pair1_OG_TSG=""
pthl=which(is.element(sv$Pair1_Gene,unique(pathw$Gene)))
sv[pthl,c("Pair1_OG_TSG","Pair1_pathwaylist")]=pathw[sv$Pair1_Gene[pthl],2:3]

#site2
sv$Pair2_OncoKB.Annotated=""
sv[is.element(sv$Pair2_Gene,rownames(func)),"Pair2_OncoKB.Annotated"]=func[sv$Pair2_Gene[is.element(sv$Pair2_Gene,rownames(func))],2]
sv$Pair2_OncoKB_OG_TSG=""
sv[is.element(sv$Pair2_Gene,rownames(func)),"Pair2_OncoKB_OG_TSG"]=func[sv$Pair2_Gene[is.element(sv$Pair2_Gene,rownames(func))],"OG_TSG"]

sv$Pair2_SMG_source=""
sv$Pair2_SMG=""
sv[is.element(sv$Pair2_Gene,rownames(cgc_smg)),c("Pair2_SMG_source","Pair2_SMG")]=cgc_smg[sv$Pair2_Gene[is.element(sv$Pair2_Gene,rownames(cgc_smg))],]
sv$Pair2_CGC=sv$Pair2_SMG

sv$Pair2_SMG=sub("cgc","",sub("cgc;","",sv$Pair2_SMG))
sv$Pair2_CGC=sub("smg","",sub(";smg","",sv$Pair2_CGC))

#CGC
sv$Pair2_CGC_OG_TSG=""
sv[is.element(sv$Pair2_Gene,rownames(ccg)),"Pair2_CGC_OG_TSG"]=ccg[sv$Pair2_Gene[is.element(sv$Pair2_Gene,rownames(ccg))],"Role in Cancer"]

sv$Pair2_pathwaylist=""
sv$Pair2_OG_TSG=""
pthl=which(is.element(sv$Pair2_Gene,unique(pathw$Gene)))
sv[pthl,c("Pair2_OG_TSG","Pair2_pathwaylist")]=pathw[sv$Pair2_Gene[pthl],2:3]
write.table(sv,"SV_pair_gene_tier_annotation.xls",sep="\t",row.names = F,quote=F)