[合集]R语言绘图(ggplot2,ggpubr)从入门到精通

[合集]R语言绘图(ggplot2,ggpubr)从入门到精通
# * 第一章:快速探索数据 ------------------------------------------------------------
###################
## 散点图
# 最简单的函数:plot(x, y)
plot(mtcars$wt,mtcars$mpg)
# 使用qplot
library(ggplot2)
qplot(mtcars$wt,mtcars$mpg)
qplot(wt, mpg, data = mtcars)
# 使用ggplot2
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point()
###################
## 折线图、曲线图
# 同样可以用plot来画:
# plot中type参数可以指定绘图的类型:如:"7"就是指折、曲线图
plot(pressure$temperature, pressure$pressure, type = "l")
points (pressure$temperature, pressure$pressure)
lines(pressure$temperature, pressure$pressure/2, col ="red")
points(pressure$temperature, pressure$pressure/2, col = "red")
# 使用qplot
library (ggplot2)
qplot (pressure$temperature, pressure$pressure, geom = "line")
qplot(temperature, pressure,data = pressure, geom = "line") #如果变量x和y 都来自于同一个数据框,还可以这样写;
# 使用ggplot2
ggplot(pressure, aes(x = temperature, y = pressure)) +
geom_line() +
geom_point()
###################
## 柱状图
#使用barplot
BOD<- BOD
mtcars <- mtcars
barplot(BOD$demand,names.arg = BOD$Time)
# 有时“条形图”指的是一个图表,其中的条形图代表列每个类型的案例数
# 这类似与直方图,但是x轴是离散的,而不是连续的,这个时候要用table函数生成每个类别的计数
barplot(mtcars$cyl)
barplot(table(mtcars$cyl))
# 使用qplot()
library(ggplot2)
qplot(mtcars$cyl) #连续变量
qplot(factor(mtcars$cyl)) #分类变量
# ggplot
ggplot(mtcars,aes(cyl))+geom_bar()
# stat = "count"(默认):表示一个x对应落到该x的样本数
# stat = "identity": 表示一个x对应一个y
# 说白了就是,identity提取横坐标x对应的y值,count提取横坐标的频数
ggplot(BOD,aes(Time,demand)) + geom_bar(stat = "identity")
ggplot(BOD,aes(factor(Time),demand)) + geom_bar(stat = "identity")
###################
## 直方图
# x是连续变量
#基础方法:hist函数
hist(mtcars$mpg)
hist(mtcars$mpg, breaks = 10)
#qplot()
qplot(mpg,data = mtcars, binwidth = 1 ) #binwidth参数指定组距
#等价于
ggplot(data = mtcars, aes(x = mpg)) + geom_histogram( binwidth = 2)
###################
## 箱线图
#使用plot()函数,当x为因子变量(与数值变量对应时),默认绘制箱线图
ToothGrowth <- ToothGrowth
plot(ToothGrowth$supp, ToothGrowth$len)
#使用公式语法
boxplot(len ~ supp, data = ToothGrowth)
#在x轴引入两变量的交互,可以绘制分组箱线图
boxplot(len ~ supp + dose, data = ToothGrowth)
#qplot()绘制箱线图
qplot(supp,len,data = ToothGrowth, geom = "boxplot")
#使用三个独立的向量参数
qplot(interaction(supp,dose),len, data = ToothGrowth,geom = "boxplot")
#等价与
ggplot(data = ToothGrowth, aes(x = interaction(supp,dose), y = len)) + geom_boxplot()
###################
## 绘制函数图像
#使用curve()函数绘制,传入一个关于变量x的表达式
curve(x^3 - 5*x, from = -4, to =4)
#自定义函数图像
my_fun <- function(xvar){
1/(1 + exp(-xvar + 10))
}
curve(my_fun(x), from = 0, to =20)
#原有的基础上添加一条线
curve(1 - my_fun(x),add = T, col = "red")
#等价于
ggplot(data.frame(x = c(0,20)),aes(x =x)) + stat_function(fun = my_fun, geom ="line")
my_fun2 <- function(xvar){
1- 1/(1 + exp(-xvar + 10))
}
ggplot(data.frame(x = c(0,20)),aes(x =x)) + stat_function(fun = my_fun, geom ="line") + stat_function(fun = my_fun2, geom ="line",color="red")
# * 第二章:柱状图深入研究 ------------------------------------------------------------
library(gcookbook)
pg_mean <- pg_mean
# 修改柱状图的填充(fill)和描边(color):
ggplot(pg_mean,aes(group, weight)) +
geom_bar(stat = "identity", fill = "lightblue", color = "black")
# 也可通过分组变量设置颜色:
cabbage_exp <- cabbage_exp
ggplot(cabbage_exp, aes(Date, fill = Cultivar)) +
geom_bar(position = "dodge")
#拓展:postition参数: 主要是指对图像的微调,最常见的应用是在分组的柱形图(bar)中,因为分组的柱形图会产生祖内堆积和不堆积两种主要的效果
# - position常用参数值:其中stack和dodge最为常用。
# - "identity": 不调整,组内前后重叠;
# - "stack": 堆积, 默认;
# - "fill": 按比例堆积;
# - "dodge": 分散
# - ColorBrewer 配色,使用的是scale_colour_brewer() 、scale_fill_brewer()。 想要了解所有的调色板,可以使用RColorBrewer::display.brewer.all()查看
# 通过scale_fill_brewer()修改颜色模式:
ggplot(cabbage_exp, aes(Date, Weight, fill = Cultivar)) +
geom_bar(position = "dodge", stat = "identity", color = "black") +
scale_fill_brewer(palette = "Pastel1")
# 可以使用以下代码查看调色板
RColorBrewer::display.brewer.all()
###################
## 分组柱状图
library(gcookbook)
cabbage_exp <- cabbage_exp
ggplot(cabbage_exp, aes(Date, Weight, fill = Cultivar)) +
geom_bar(position = "dodge", stat = "identity", color = "black")
ggplot(cabbage_exp, aes(Date, Weight, fill = Cultivar)) +
geom_bar(stat = "identity") # 默认的position = "stack";
# 此外,除了fill可以指定分组变量,color, linestyle也可以指定;
ggplot(cabbage_exp, aes(Date, Weight, color = Cultivar)) +
geom_bar(position = "dodge", stat = "identity")
# 请注意,如果类别变量的组合有任何缺失,则该栏将缺失,相邻的栏将扩展
# 以填充该空间。
ce = cabbage_exp[1:5,]
ggplot(ce, aes(Date, Weight, fill = Cultivar)) +
geom_bar(position = "dodge", stat = "identity", color = "black") +
scale_fill_brewer(palette = "Pastel1")
# 实际情况下确实存在有一种类别没有对应的y值,此时可以使用NA或者0代替
ce_NA <- cabbage_exp
ce_NA$Weight[6] <- 0
ggplot(ce_NA, aes(Date, Weight, fill = Cultivar)) +
geom_bar(position = "dodge", stat = "identity", color = "black") +
scale_fill_brewer(palette = "Pastel1")
###################
## 修改颜色的技巧
library(gcookbook)
upc <- subset(uspopchange, rank(Change) > 40)
upc
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) +
geom_bar(stat = "identity")
# 此示例还使用reorder()函数,将条形按其高度进行排序:
ggplot(upc, aes(x = reorder(Abb, change), y = Change, fill = Region)) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = c("#669933", "#FFCC66")) +
xlab("State")
#拓展:reorder()函数:
# reorder(x, X, FUN = mean, ..., order = is.ordered(x), decreasing = FALSE)
# x: x是要排序的数据,排序后的结果作用于x上
# X: 要根据X进行排序
#拓展:scale_fill_manual()函数:
#自定义颜色:输入的变量长度与分组变量的长度一致
#scale_fill_manual(..., values, aesthetics = "fill", breaks = waiver(), na.value = "grey50")
###################
## 正负两极不同的颜色
csub <- subset(climate, Source == "Berkeley" & Year >= 1900)
# 思路:颜色根据正负数来填充?怎么识别正负数呢?
# 只能新建一个字段;该字段描述来正和负;
# 指定fill为该字段;
csub$pos <- csub$Anomaly10y >= 0
csub
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) +
geom_bar(stat = "identity", position = "identity")
# 使用scale_fill_manual()修改颜色;guide = FALSE参数去掉图例
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) +
geom_bar(stat = "identity", position = "identity", color = "black", size = 0.25) +
scale_fill_manual(values = c("#CCEEFF", "#FFDDDD"), guide = FALSE)
###################
## 调整条形的宽度和间距(width参数):
library(gcookbook)
# width默认是0.9
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_bar(stat = "identity")
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_bar(stat = "identity", width = 0.5)
# width最大值只能设置为1:
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_bar(stat = "identity", width = 1)
## 调节分组条形图之间的间距:
# 默认的同一分组之间的条形是没有间距的:
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity", width = 0.5, position = "dodge")
# 只需要将position_dodge参数设置的比width参数大一些就好了!
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity", width = 0.5, position = position_dodge(0.7))
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity", width = 0.5, position = position_dodge(0.3))
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity", width = 0.5, position = position_dodge(0))
# 思考:position_dodge有什么含义?为什么比width大就会有间隙吗?
# position_dodge是从一个柱子的右边到另一个柱子的右边的距离
# 因为默认的position_dodge()里的内容一定是和width相等的
###################
## 堆积柱状图:
# position的默认值为stack;
# 即如果不设置position,并且设置了分组变量,就是画堆积图;
library(gcookbook)
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity")
# 修改图例堆积的顺序:guides()
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity") +
guides(fill = guide_legend(reverse = T))
# 修改图形堆积顺序:修改因子水平
cabbage_exp$Cultivar <- factor(cabbage_exp$Cultivar, levels = c("c52", "c39"))
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity")
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity", color = "black") +
guides(fill = guide_legend(reverse = T)) +
scale_fill_brewer(palette = "Pastel1")
###################
## 修改标签:
library(ggplot2)
library(gcookbook)
cabbage_exp <- cabbage_exp
# 标签位置的设定,vjust参数:
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Weight), vjust = 1.5, colour = "white")
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Weight), vjust = -0.2, colour = "white")
# 为了防止标签跑出图形,可以调整y轴的范围:
# 方法一:ylim()函数:
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Weight), vjust = -0.2) +
ylim(0, max(cabbage_exp$Weight) * 1.05)
# 方法二:以weight为基准,调节y值,图形高度会自动适配:
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_bar(stat = "identity") +
geom_text(aes(y = Weight + 0.1, label = Weight))
# 分组柱状图加标签:需要设定position_dodge(),以调整字体适合位置
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = Weight), vjust = 1.5, color = "white", position = position_dodge(0.9), size = 3)
# width参数默认是0.9,position_dodge(0.9),这样子可以让标签位于柱子中间
library(plyr)
# 堆积柱状图添加label
ce <- arrange(cabbage_exp, Date, Cultivar)
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight))
ce$Cultivar <- factor(ce$Cultivar, levels = c("c52", "c39"))
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity") +
geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")
# 修改标签至中央
ce <- arrange(cabbage_exp, Date, Cultivar)
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight) - 0.5 * Weight)
ce$Cultivar <- factor(ce$Cultivar, levels = c("c52", "c39"))
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity") +
geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")
# 添加单位,并修改颜色模式
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity", colour = "black") +
geom_text(aes(y = label_y, label = paste(format(Weight,nsmall = 2), "kg")), size = 4) +
scale_fill_brewer(palette = "Pastel1")
###################
## 柱状图的拓展:克里夫兰点图绘制
library(gcookbook)
tophit <- tophitters2001[1:25,]
# 从最基本的散点图出发
ggplot(tophit, aes(x = avg, y = name)) +
geom_point()
# reorder排序一定要熟练:前面柱状图排序讲过
ggplot(tophit, aes(x = avg, y = reorder(name, avg))) +
geom_point(size = 3) + #修改点的大小
theme_bw() + #修改背景
theme(panel.grid.major.x = element_blank(), #设置纵向网格线为空
panel.grid.minor.x = element_blank(), #设置横向网格线为虚线(dashed)
panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed"))
# 颠倒图形的x轴和y轴
ggplot(tophit, aes(x = reorder(name, avg), y = avg))+
geom_point(size = 3) + #修改点的大小
theme_bw() + #修改背景
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour = "grey60", linetype = "dashed"))
# 按照lg和avg对name进行排序,先按lg排,再按avg排
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]
# 将name变量转化为因子,因子水平设定位nameorder:
tophit$name <- factor(tophit$name, levels = nameorder)
## 绘制彩色克利夫兰点图:
ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
geom_point(size = 3, aes(color = lg)) + # 设置分组变量
# limits限定颜色先后顺序:
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL")) +
theme_bw() +
theme(panel.grid.major.y = element_blank(), #去除横线网格线
legend.position = c(1, 0.55), # 设置图例的位置:这里的1指的是与x轴的比例;
#legend.justification = c(1, 0.5) #表示图例右边缘中点;
# (1, 0) 表示右下角,(0, 1) 表示左上角,以此类推;
legend.justification = c(1, 0.5))
# 分面绘制:facet_grid()函数:
ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
geom_point(size = 3, aes(color = lg)) + # 设置分组变量
# guides去除图例:
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL"), guide = F) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
# 一列多行:lg~ 行数等于lg的种类数目;
# scales设置每个分块的单位宽度;space设置每个分块的宽度;
facet_grid(lg~.,scales = "free_y", space = "free_y")
# * 第三章:各式各样的饼图 ------------------------------------------------------------
library(ggplot2)
# 技巧篇--ggplot绘制各种饼图:
mpg <- mpg
ggplot(mpg, aes(class)) +
geom_bar()
# 把y轴方向扭曲来,柱子都边成了弯的:
ggplot(mpg, aes(class)) +
geom_bar() +
coord_polar(theta = "y")
# 把x轴方向扭曲来,柱子都从一个中心出发:
ggplot(mpg, aes(class)) +
geom_bar() +
coord_polar(theta = "x")
# 加上颜色分组:
ggplot(mpg, aes(class)) +
geom_bar(aes(fill = drv)) +
coord_polar(theta = "y")
# 加上颜色分组:
ggplot(mpg, aes(class)) +
geom_bar(aes(fill = drv)) +
coord_polar(theta = "x")
# 如何绘制正常的饼图?
ggplot(mpg, aes(1, fill = class)) +
geom_bar(width = 0.5)
ggplot(mpg, aes(1, fill = class)) +
geom_bar(width = 0.5) +
coord_polar(theta = "y")
# 加上标签:
ggplot(mpg, aes(1, fill = class)) +
geom_bar(width = 0.5) +
coord_polar(theta = "y") +
geom_text(stat = "count", aes(label = scales::percent(..count../100)),
size = 3, position = position_stack(vjust = 0.5))
# 课后作业:如何使用position_stack()修改标签位置呢?
ce <- arrange(cabbage_exp, Date, Cultivar)
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight) - 0.5 * Weight)
ce$Cultivar <- factor(ce$Cultivar, levels = c("c52", "c39"))
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity") +
geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")
# 使用position_stack()可以大大节约我们的代码:
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Weight), position = position_stack(vjust = 0.5), colour = "white")