用R语言进行GO与KEGG可视化分析
参考了CSDN这位大佬的代码:
原文链接:https://blog.csdn.net/weixin_54004950/article/details/128498456
我视频中自己使用的代码放在下面:
#下载需要的R包
install.packages("ggplot2")
install.packages("openxlsx")
install.packages("tidyverse")
#加载需要的包
library (ggplot2) #作图
library(tidyverse) #分列函数
library(openxlsx) #打开excel
#工作目录的确定
getwd() #查询工作目录
setwd("C:/Users/LD/Desktop/GO&KEGG") #设定工作目录(据自己需求)
#数据的读取
BP = read.xlsx("C:/Users/LD/Desktop/GO&KEGG/GO&KEGG.xlsx",sheet= "BP")
CC = read.xlsx("C:/Users/LD/Desktop/GO&KEGG/GO&KEGG.xlsx",sheet= "CC")
MF = read.xlsx("C:/Users/LD/Desktop/GO&KEGG/GO&KEGG.xlsx",sheet= "MF")
dim(BP)
dim(CC)
dim(MF)
#数据中Term的分列
BP = separate(BP,Term, sep="~",into=c("ID","Term"))
CC = separate(CC,Term, sep="~",into=c("ID","Term"))
MF = separate(MF,Term, sep="~",into=c("ID","Term"))
#数据的整理
Go_data=data.frame(ID=c(BP$ID,CC$ID,MF$ID), #将ID整合
Term=c(BP$Term,CC$Term,MF$Term), #将Term整合
GeneNumber=c(BP$Count,CC$Count,MF$Count), #将GeneNumber整合
Type=c(BP$Type,CC$Type,MF$Type))
#创建因子变量
Go_data$Type_order = factor(Go_data$Term,levels=Go_data$Term,ordered = T)
View(Go_data) #查看数据
ggplot(Go_data,
aes(x=Type_order,y=GeneNumber, fill=Type)) + #根据type填充颜色
geom_bar(stat="identity", width=0.9) + #柱状图的宽
scale_fill_manual(values = c("blue", "green", "red") ) + #柱状图的填充颜色
coord_flip() + #纵向
xlab("GO Term") + #x轴的标题
ylab("Gene_Number") + #y轴的标题
labs(title = "GO Enrich")+ #设置标题
theme_bw() #设置主题
#气泡图
ago = rbind(BP,CC,MF) #数据的集合
ago = as.data.frame(ago)
rownames(ago) = 1:nrow(ago)
# 创建因子变量
ago$order=factor(rev(as.integer(rownames(ago))),labels = rev(ago$Term))
#作图
ggplot(ago,aes(y=order,x=Gene.ratio))+
geom_point(aes(size=Count,color=PValue))+
scale_color_gradient(low = "red",high ="blue")+
labs(color=expression(PValue,size="Count"),
x="Gene Ratio",y="GO term",title="GO Enrichment")+
theme_bw()
#横向
ggplot(Go_data,
aes(x=Type_order,y=GeneNumber, fill=Type)) + #x、y轴定义;根据Type填充颜色
geom_bar(stat="identity", width=0.8) + #柱状图的宽度
scale_fill_manual(values = c("blue", "green", "red") ) + #柱状图的填充颜色
xlab("GO term") + #x轴标题
ylab("Gene_Number") + #y轴标题
labs(title = "GO Terms Enrich")+ #设置标题
theme_bw()
#KEGG可视化
#数据的读取
KEGG_data=read.xlsx("C:/Users/LD/Desktop/GO&KEGG/GO&KEGG.xlsx",sheet= "KEGG")
#作图
ggplot(KEGG_data,aes(y=Term,x=Count,fill=PValue))+
geom_bar(stat = "identity",width=0.8)+ #柱状图宽度设置
scale_fill_gradient(low = "red",high ="blue" )+
labs(title = "KEGG Pathways Enrichment", #设置标题、x轴和Y轴名称
x = "Gene number",
y = "Pathway")+
theme(axis.title.x = element_text(face = "bold",size = 16),
axis.title.y = element_text(face = "bold",size = 16),
legend.title = element_text(face = "bold",size = 16))+
theme_bw()
#KEGG气泡图
ggplot(KEGG_data,aes(y=Term,x=Gene.ratio))+
geom_point(aes(size=Count,color=PValue))+
scale_color_gradient(low = "red",high ="blue")+
labs(color=expression(PValue,size="Count"),
x="Gene Ratio",y="Pathways",title="KEGG Pathway Enrichment")+
theme_bw()
视频中使用的Excel表格文件与R.scipt分享至:https://www.jianguoyun.com/p/DaKIerIQ18DmCxj06JMFIAA
https://www.jianguoyun.com/p/DTpMJUgQ18DmCxj46JMFIAA