欢迎光临散文网 会员登陆 & 注册

[合集]R语言绘图(ggplot2,ggpubr)从入门到精通

2022-11-22 16:05 作者:请叫我小鸡崽  | 我要投稿

[合集]R语言绘图(ggplot2,ggpubr)从入门到精通


# * 第一章:快速探索数据 ------------------------------------------------------------


###################

## 散点图


# 最简单的函数:plot(x, y)

plot(mtcars$wt,mtcars$mpg)


# 使用qplot

library(ggplot2)

qplot(mtcars$wt,mtcars$mpg) 

qplot(wt, mpg, data = mtcars)


# 使用ggplot2

ggplot(mtcars, aes(x = wt, y = mpg)) + 

 geom_point()


###################

## 折线图、曲线图


# 同样可以用plot来画:

# plot中type参数可以指定绘图的类型:如:"7"就是指折、曲线图

  

plot(pressure$temperature, pressure$pressure, type = "l") 

points (pressure$temperature, pressure$pressure)


lines(pressure$temperature, pressure$pressure/2, col ="red") 

points(pressure$temperature, pressure$pressure/2, col = "red")


# 使用qplot

library (ggplot2)

qplot (pressure$temperature, pressure$pressure, geom = "line")

qplot(temperature, pressure,data = pressure, geom = "line") #如果变量x和y 都来自于同一个数据框,还可以这样写;


# 使用ggplot2

ggplot(pressure, aes(x = temperature, y = pressure)) + 

 geom_line() +

 geom_point()




###################

## 柱状图

#使用barplot

BOD<- BOD

mtcars <- mtcars

barplot(BOD$demand,names.arg = BOD$Time)



# 有时“条形图”指的是一个图表,其中的条形图代表列每个类型的案例数

# 这类似与直方图,但是x轴是离散的,而不是连续的,这个时候要用table函数生成每个类别的计数

barplot(mtcars$cyl)

barplot(table(mtcars$cyl))


# 使用qplot()

library(ggplot2)

qplot(mtcars$cyl) #连续变量

qplot(factor(mtcars$cyl)) #分类变量


# ggplot

ggplot(mtcars,aes(cyl))+geom_bar()


# stat = "count"(默认):表示一个x对应落到该x的样本数

# stat = "identity": 表示一个x对应一个y

# 说白了就是,identity提取横坐标x对应的y值,count提取横坐标的频数

ggplot(BOD,aes(Time,demand)) + geom_bar(stat = "identity")


ggplot(BOD,aes(factor(Time),demand)) + geom_bar(stat = "identity")



###################

## 直方图

# x是连续变量


#基础方法:hist函数

hist(mtcars$mpg)

hist(mtcars$mpg, breaks = 10)


#qplot()

qplot(mpg,data = mtcars, binwidth = 1 ) #binwidth参数指定组距


#等价于

ggplot(data = mtcars, aes(x = mpg)) + geom_histogram( binwidth = 2)



###################

## 箱线图

#使用plot()函数,当x为因子变量(与数值变量对应时),默认绘制箱线图

ToothGrowth <- ToothGrowth

plot(ToothGrowth$supp, ToothGrowth$len)


#使用公式语法

boxplot(len ~ supp, data = ToothGrowth)


#在x轴引入两变量的交互,可以绘制分组箱线图

boxplot(len ~ supp + dose, data = ToothGrowth)


#qplot()绘制箱线图

qplot(supp,len,data = ToothGrowth, geom = "boxplot")


#使用三个独立的向量参数

qplot(interaction(supp,dose),len, data = ToothGrowth,geom = "boxplot")


#等价与

ggplot(data = ToothGrowth, aes(x = interaction(supp,dose), y = len)) + geom_boxplot()



###################

## 绘制函数图像

#使用curve()函数绘制,传入一个关于变量x的表达式

curve(x^3 - 5*x, from = -4, to =4)


#自定义函数图像

my_fun <- function(xvar){

 1/(1 + exp(-xvar + 10))

}

curve(my_fun(x), from = 0, to =20)


#原有的基础上添加一条线

curve(1 - my_fun(x),add = T, col = "red")


#等价于

ggplot(data.frame(x = c(0,20)),aes(x =x)) + stat_function(fun = my_fun, geom ="line")


my_fun2 <- function(xvar){

 1- 1/(1 + exp(-xvar + 10))

}

ggplot(data.frame(x = c(0,20)),aes(x =x)) + stat_function(fun = my_fun, geom ="line") + stat_function(fun = my_fun2, geom ="line",color="red")


# * 第二章:柱状图深入研究 ------------------------------------------------------------

library(gcookbook)

pg_mean <- pg_mean

# 修改柱状图的填充(fill)和描边(color):

ggplot(pg_mean,aes(group, weight)) +

 geom_bar(stat = "identity", fill = "lightblue", color = "black")


# 也可通过分组变量设置颜色:

cabbage_exp <- cabbage_exp

ggplot(cabbage_exp, aes(Date, fill = Cultivar)) +

 geom_bar(position = "dodge")

 #拓展:postition参数: 主要是指对图像的微调,最常见的应用是在分组的柱形图(bar)中,因为分组的柱形图会产生祖内堆积和不堆积两种主要的效果

 # - position常用参数值:其中stack和dodge最为常用。

 # - "identity": 不调整,组内前后重叠;

 # - "stack": 堆积, 默认;

 # - "fill": 按比例堆积;

 # - "dodge": 分散  

 # - ColorBrewer 配色,使用的是scale_colour_brewer() 、scale_fill_brewer()。 想要了解所有的调色板,可以使用RColorBrewer::display.brewer.all()查看



# 通过scale_fill_brewer()修改颜色模式:

ggplot(cabbage_exp, aes(Date, Weight, fill = Cultivar)) +

 geom_bar(position = "dodge", stat = "identity", color = "black") +

 scale_fill_brewer(palette = "Pastel1")


# 可以使用以下代码查看调色板

RColorBrewer::display.brewer.all()




###################

## 分组柱状图

library(gcookbook)

cabbage_exp <- cabbage_exp


ggplot(cabbage_exp, aes(Date, Weight, fill = Cultivar)) +

 geom_bar(position = "dodge", stat = "identity", color = "black")


ggplot(cabbage_exp, aes(Date, Weight, fill = Cultivar)) + 

 geom_bar(stat = "identity") # 默认的position = "stack";



# 此外,除了fill可以指定分组变量,color, linestyle也可以指定;

ggplot(cabbage_exp, aes(Date, Weight, color = Cultivar)) + 

 geom_bar(position = "dodge", stat = "identity")


# 请注意,如果类别变量的组合有任何缺失,则该栏将缺失,相邻的栏将扩展

# 以填充该空间。

ce = cabbage_exp[1:5,]


ggplot(ce, aes(Date, Weight, fill = Cultivar)) +

 geom_bar(position = "dodge", stat = "identity", color = "black") +

 scale_fill_brewer(palette = "Pastel1")


# 实际情况下确实存在有一种类别没有对应的y值,此时可以使用NA或者0代替

ce_NA <- cabbage_exp

ce_NA$Weight[6] <- 0


ggplot(ce_NA, aes(Date, Weight, fill = Cultivar)) +

 geom_bar(position = "dodge", stat = "identity", color = "black") +

 scale_fill_brewer(palette = "Pastel1")



###################

## 修改颜色的技巧

library(gcookbook)

upc <- subset(uspopchange, rank(Change) > 40)

upc


ggplot(upc, aes(x = Abb, y = Change, fill = Region)) + 

 geom_bar(stat = "identity")


# 此示例还使用reorder()函数,将条形按其高度进行排序:

ggplot(upc, aes(x = reorder(Abb, change), y = Change, fill = Region)) +  

 geom_bar(stat = "identity", color = "black") +

 scale_fill_manual(values = c("#669933", "#FFCC66")) + 

 xlab("State")


  #拓展:reorder()函数: 

  # reorder(x, X, FUN = mean, ..., order = is.ordered(x), decreasing = FALSE)

  # x: x是要排序的数据,排序后的结果作用于x上

  # X: 要根据X进行排序


  #拓展:scale_fill_manual()函数:

  #自定义颜色:输入的变量长度与分组变量的长度一致

  #scale_fill_manual(..., values, aesthetics = "fill", breaks = waiver(), na.value = "grey50")


###################

## 正负两极不同的颜色

csub <- subset(climate, Source == "Berkeley" & Year >= 1900)


# 思路:颜色根据正负数来填充?怎么识别正负数呢?

# 只能新建一个字段;该字段描述来正和负;

# 指定fill为该字段;

csub$pos <- csub$Anomaly10y >= 0 

csub


ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) +

 geom_bar(stat = "identity", position = "identity")


# 使用scale_fill_manual()修改颜色;guide = FALSE参数去掉图例

ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + 

 geom_bar(stat = "identity", position = "identity", color = "black", size = 0.25) +

 scale_fill_manual(values = c("#CCEEFF", "#FFDDDD"), guide = FALSE)

  

###################

## 调整条形的宽度和间距(width参数):

library(gcookbook)

# width默认是0.9

ggplot(pg_mean, aes(x = group, y = weight)) +

 geom_bar(stat = "identity") 


ggplot(pg_mean, aes(x = group, y = weight)) + 

 geom_bar(stat = "identity", width = 0.5)


# width最大值只能设置为1:

ggplot(pg_mean, aes(x = group, y = weight)) + 

 geom_bar(stat = "identity", width = 1)


## 调节分组条形图之间的间距:

# 默认的同一分组之间的条形是没有间距的:

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity", width = 0.5, position = "dodge")


# 只需要将position_dodge参数设置的比width参数大一些就好了!

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity", width = 0.5, position = position_dodge(0.7))


ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity", width = 0.5, position = position_dodge(0.3))


ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity", width = 0.5, position = position_dodge(0))


# 思考:position_dodge有什么含义?为什么比width大就会有间隙吗?

 # position_dodge是从一个柱子的右边到另一个柱子的右边的距离

 # 因为默认的position_dodge()里的内容一定是和width相等的


 

###################

## 堆积柱状图:

# position的默认值为stack;

# 即如果不设置position,并且设置了分组变量,就是画堆积图;

library(gcookbook)

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity")


# 修改图例堆积的顺序:guides()

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity") +

 guides(fill = guide_legend(reverse = T))


# 修改图形堆积顺序:修改因子水平

cabbage_exp$Cultivar <- factor(cabbage_exp$Cultivar, levels = c("c52", "c39"))


ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity")



ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity", color = "black") + 

 guides(fill = guide_legend(reverse = T)) +

 scale_fill_brewer(palette = "Pastel1")



###################

## 修改标签:

library(ggplot2)

library(gcookbook)

cabbage_exp <- cabbage_exp


# 标签位置的设定,vjust参数:

ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +

 geom_bar(stat = "identity") +

 geom_text(aes(label = Weight), vjust = 1.5, colour = "white")



ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + 

 geom_bar(stat = "identity") +

 geom_text(aes(label = Weight), vjust = -0.2, colour = "white")


# 为了防止标签跑出图形,可以调整y轴的范围:

# 方法一:ylim()函数:

ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +

 geom_bar(stat = "identity") +

 geom_text(aes(label = Weight), vjust = -0.2) +

 ylim(0, max(cabbage_exp$Weight) * 1.05)


# 方法二:以weight为基准,调节y值,图形高度会自动适配:

ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + 

 geom_bar(stat = "identity") +

 geom_text(aes(y = Weight + 0.1, label = Weight))



# 分组柱状图加标签:需要设定position_dodge(),以调整字体适合位置

ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight, fill = Cultivar)) + 

 geom_bar(stat = "identity", position = "dodge") + 

 geom_text(aes(label = Weight), vjust = 1.5, color = "white", position = position_dodge(0.9), size = 3) 

 # width参数默认是0.9,position_dodge(0.9),这样子可以让标签位于柱子中间



library(plyr)

# 堆积柱状图添加label

ce <- arrange(cabbage_exp, Date, Cultivar)


ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight))


ce$Cultivar <- factor(ce$Cultivar, levels = c("c52", "c39"))


ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + 

 geom_bar(stat = "identity") + 

 geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")



# 修改标签至中央

ce <- arrange(cabbage_exp, Date, Cultivar)


ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight) - 0.5 * Weight)


ce$Cultivar <- factor(ce$Cultivar, levels = c("c52", "c39"))


ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + 

 geom_bar(stat = "identity") + 

 geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")


# 添加单位,并修改颜色模式

ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity", colour = "black") +

 geom_text(aes(y = label_y, label = paste(format(Weight,nsmall = 2), "kg")), size = 4) +

 scale_fill_brewer(palette = "Pastel1")



###################

## 柱状图的拓展:克里夫兰点图绘制

library(gcookbook)


tophit <- tophitters2001[1:25,]


# 从最基本的散点图出发

ggplot(tophit, aes(x = avg, y = name)) +

 geom_point()


# reorder排序一定要熟练:前面柱状图排序讲过

ggplot(tophit, aes(x = avg, y = reorder(name, avg))) + 

 geom_point(size = 3) + #修改点的大小

 theme_bw() + #修改背景

 theme(panel.grid.major.x = element_blank(), #设置纵向网格线为空

    panel.grid.minor.x = element_blank(), #设置横向网格线为虚线(dashed)

    panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed"))



# 颠倒图形的x轴和y轴

ggplot(tophit, aes(x = reorder(name, avg), y = avg))+ 

 geom_point(size = 3) + #修改点的大小

 theme_bw() + #修改背景

 theme(panel.grid.major.y = element_blank(), 

    panel.grid.minor.y = element_blank(), 

    panel.grid.major.x = element_line(colour = "grey60", linetype = "dashed"))


# 按照lg和avg对name进行排序,先按lg排,再按avg排

nameorder <- tophit$name[order(tophit$lg, tophit$avg)]

# 将name变量转化为因子,因子水平设定位nameorder:

tophit$name <- factor(tophit$name, levels = nameorder)


## 绘制彩色克利夫兰点图:

ggplot(tophit, aes(x = avg, y = name)) +

 geom_segment(aes(yend = name), xend = 0, colour = "grey50") +

 geom_point(size = 3, aes(color = lg)) + # 设置分组变量

 # limits限定颜色先后顺序:

 scale_colour_brewer(palette = "Set1", limits = c("NL", "AL")) +

 theme_bw() + 

 theme(panel.grid.major.y = element_blank(), #去除横线网格线

    legend.position = c(1, 0.55), # 设置图例的位置:这里的1指的是与x轴的比例;

    #legend.justification = c(1, 0.5) #表示图例右边缘中点;

    # (1, 0) 表示右下角,(0, 1) 表示左上角,以此类推;

    legend.justification = c(1, 0.5))

 

# 分面绘制:facet_grid()函数:

 ggplot(tophit, aes(x = avg, y = name)) +

  geom_segment(aes(yend = name), xend = 0, colour = "grey50") +

  geom_point(size = 3, aes(color = lg)) + # 设置分组变量

 # guides去除图例:

 scale_colour_brewer(palette = "Set1", limits = c("NL", "AL"), guide = F) +

  theme_bw() +

  theme(panel.grid.major.y = element_blank()) +

  # 一列多行:lg~ 行数等于lg的种类数目;

  # scales设置每个分块的单位宽度;space设置每个分块的宽度;

  facet_grid(lg~.,scales = "free_y", space = "free_y")

  

  

  

# * 第三章:各式各样的饼图 ------------------------------------------------------------

library(ggplot2)

# 技巧篇--ggplot绘制各种饼图:

mpg <- mpg



ggplot(mpg, aes(class)) +

 geom_bar()

# 把y轴方向扭曲来,柱子都边成了弯的:

ggplot(mpg, aes(class)) +

 geom_bar() +

 coord_polar(theta = "y")


# 把x轴方向扭曲来,柱子都从一个中心出发:

ggplot(mpg, aes(class)) +

 geom_bar() +

 coord_polar(theta = "x")


# 加上颜色分组:

ggplot(mpg, aes(class)) +

 geom_bar(aes(fill = drv)) + 

 coord_polar(theta = "y")


# 加上颜色分组:

ggplot(mpg, aes(class)) +

 geom_bar(aes(fill = drv)) + 

 coord_polar(theta = "x")



# 如何绘制正常的饼图?

ggplot(mpg, aes(1, fill = class)) +

 geom_bar(width = 0.5) 


ggplot(mpg, aes(1, fill = class)) +

 geom_bar(width = 0.5) + 

 coord_polar(theta = "y")



# 加上标签:

ggplot(mpg, aes(1, fill = class)) +

 geom_bar(width = 0.5) + 

 coord_polar(theta = "y") +

 geom_text(stat = "count", aes(label = scales::percent(..count../100)),

      size = 3, position = position_stack(vjust = 0.5))


# 课后作业:如何使用position_stack()修改标签位置呢?

ce <- arrange(cabbage_exp, Date, Cultivar)


ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight) - 0.5 * Weight)


ce$Cultivar <- factor(ce$Cultivar, levels = c("c52", "c39"))


ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + 

 geom_bar(stat = "identity") + 

 geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")



# 使用position_stack()可以大大节约我们的代码:

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +

 geom_bar(stat = "identity") + 

 geom_text(aes(label = Weight), position = position_stack(vjust = 0.5), colour = "white") 

[合集]R语言绘图(ggplot2,ggpubr)从入门到精通的评论 (共 条)

分享到微博请遵守国家法律