title: “Data visualization manual of R”
author: “hardy”
date: “2020”
output:
pdf_document: default
html_document: default
数据可视化手册代码:数据探索性分析主要条形图,直方图,散点直线图、箱线图以及自制函数图。我们从条形图开始。首先了解绘图区域,绘图行为和ggplot2参见R绘图系统。
绘图行为:颜色(bg,fg,col)
线条(lty,lwd,solid,dashed)
文本(ps,cex,font,family,adj)
数据符号(pch,type)
坐标轴(axis)
ggplot:数据:data
几何对象和图形属性:geom_point,geom_line,geom_path,
aes(aethetics)映射到图形属性
标度:scale_x_continuous,sacle_y_continuous
统计变换:stat,stat="identity”
图例:shap
位置调整:position="fill"等
坐标变换:coord_trans
分面:face_wrap
主题:theme
注释:geom_hline,geom_text
本文的翻译过程将不间断持续更新,在提升自己能力的同时,希望对大家学习R语言绘图有帮助。
knitr::opts_chunk$set(echo = TRUE)
直方图
简单直方图
连续形变量
rm(list=ls())
library(gcookbook)
library(ggplot2)
head(pg_mean)
ggplot(pg_mean,aes(x=group,y=weight))+
geom_bar(stat="identity")
使用factor转为离散型变量
ggplot(BOD,aes(x=Time,y=demand))+geom_bar(stat="identity")
ggplot(BOD,aes(x=factor(Time),y=demand))+geom_bar(stat="identity")
ggplot(BOD,aes(x=factor(Time),y=demand))+geom_bar(stat="identity",
fill="lightblue",
color="black")
簇状直方图,对比位置调整缺失的不同
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(position = "dodge",stat = "identity")
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat = "identity")
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(position="dodge",stat = "identity",color="black")+
scale_fill_brewer(palette = "Pastell")
绘制频数条形图
ggplot(diamonds,aes(x=cut))+geom_bar()
ggplot(diamonds,aes(x=carat))+geom_bar(stat = "bin")
条形图着色
head(uspopchange)
upc<-subset(uspopchange,rank(Change)>40)
ggplot(upc,aes(x=reorder(Abb,Change),y=Change,fill=Region))+geom_bar(stat = "identity",color="black")+
scale_fill_manual(values = c("#669933","#FFCC66"))+xlab("State")
正负条形图着色
csub<-subset(climate,Source=="Berkeley"&Year>=1900)
csub$pos<-csub$Anomaly10y>=0
ggplot(csub,aes(x=Year,y=Anomaly10y,fill=pos))+geom_bar(stat="identity",position = "identity")
ggplot(csub,aes(x=Year,y=Anomaly10y,fill=pos))+
geom_bar(stat="identity",position = "identity",colour="black",size=0.25)+
scale_fill_manual(values = c("#CCEEFF","#FFDDDD"),guide=FALSE)
调整条形图宽度和间距
ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat = "identity",width = 0.5)
ggplot(pg_mean,aes(x=group,y=weight))+geom_bar(stat = "identity",width = 1)
绘制堆积条形图
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat="identity")+
guides(fill=guide_legend(reverse = TRUE))
library(plyr)
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar,order=desc(Cultivar)))+
geom_bar(stat="identity")##图例顺序的调整
library(plyr)
ggplot(cabbage_exp,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat = "identity",colour="black")+
guides(fill=guide_legend(reverse = TRUE))+
scale_fill_brewer(palette = "Pastell")
绘制堆积百分比条形图
ce<-ddply(cabbage_exp,"Date",
transform,percent_weight=Weight/sum(Weight)*100)
ggplot(ce,aes(x=Date,y=percent_weight,fill=Cultivar))+
geom_bar(stat = "identity")
ggplot(ce,aes(x=Date,y=percent_weight,fill=Cultivar))+
geom_bar(stat = "identity",colour="black")+
guides(fill=guide_legend(reverse = TRUE))+
scale_fill_brewer(palette = "Pastell")
条件数据标签
ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+
geom_bar(stat = "identity")+
geom_text(aes(label=Weight),vjust=1.5,colour="white")
ggplot(cabbage_exp,aes(x=interaction(Date,Cultivar),y=Weight))+
geom_bar(stat = "identity")+
geom_text(aes(label=Weight),vjust=-0.5,colour="black")
堆积条形图的数据标签
ce<-arrange(cabbage_exp,Date,Cultivar)
ce<-ddply(ce,"Date",transform,label_y=cumsum(Weight)-0.5*Weight)
ggplot(ce,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat = "identity")+
geom_text(aes(y=label_y,label=Weight),colour="White")
ggplot(ce,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat = "identity")+
geom_text(aes(y=label_y,label=Weight),colour="White")
ggplot(ce,aes(x=Date,y=Weight,fill=Cultivar))+
geom_bar(stat = "identity",colour="black")+
geom_text(aes(y=label_y,label=paste(format(Weight,nsmall = 2),"kg")),
size=4)+
guides(fill=guide_legend(reverse = TRUE))+
scale_fill_brewer(palette = "Pastell")
绘制cleveland点图
tophit<-tophitters2001[1:25,]
ggplot(tophit,aes(x=avg,y=name))+geom_point()
ggplot(tophit,aes(x=avg,y=reorder(name,avg)))+
geom_point(size=3)+
theme_bw()+
theme(axis.text.x = element_text(angle = 60,hjust = 1),
panel.grid.major.y =element_blank(),#主要网格线
panel.grid.minor.y = element_blank(),#次要网格线
panel.grid.major.x = element_line(colour = "grey60",linetype = "dashed"))
ggplot(tophit,aes(x=reorder(name,avg),y=avg))+
geom_point(size=3)+
theme_bw()+
theme(axis.text.x = element_text(angle = 60,hjust = 1),
panel.grid.major.y =element_blank(),#主要网格线
panel.grid.minor.y = element_blank(),#次要网格线
panel.grid.major.x = element_line(colour = "grey60",linetype = "dashed"))
火柴棒图
ggplot(tophit,aes(x=avg,y=name))+
geom_segment(aes(yend=name),xend=0,colour="grey50")+
geom_point(size=3,aes(colour=lg))+
scale_color_brewer(palette = "Set1",limits=c("NL","AL"))+
theme_bw()+
theme(panel.grid.major = element_blank(),
legend.position = c(1,0.55),
legend.justification = c(1,0.5))
ggplot(tophit,aes(x=avg,y=name))+
geom_segment(aes(yend=name),xend=0,colour="grey50")+
geom_point(size=3,aes(colour=lg))+
scale_color_brewer(palette = "Set1",limits=c("NL","AL"),guide=FALSE)+
theme_bw()+
theme(panel.grid.major.y =element_blank())+
facet_grid(lg~.,scales="free_y",space="free_y")
#以对分组的火柴棒图
折线图
ggplot(BOD,aes(x=Time,y=demand))+geom_line()#折线图
ggplot(BOD,aes(x=Time,y=demand,group=1))+geom_line()#折线图
ggplot(BOD,aes(x=Time,y=demand,group=1))+geom_line()+
ylim(0,max(BOD$demand))#限制Y轴
ggplot(BOD,aes(x=Time,y=demand,group=1))+geom_line()+
expand_limits(y=0)#限制Y轴距离
ggplot(BOD,aes(x=Time,y=demand,group=1))+geom_line()+
geom_point()#添加点标签
ggplot(worldpop,aes(x=Year,y=Population))+geom_line()+geom_point()
绘制多重折线图
tg<-ddply(ToothGrowth,c("supp","dose"),summarise,length=mean(len))
ggplot(tg,aes(x=dose,y=length,colour=supp))+geom_line()
ggplot(tg,aes(x=dose,y=length,linetype=supp))+geom_line()
ggplot(tg,aes(x=factor(dose),y=length,colour=supp,group=supp))+
geom_line()##factor将连续型变量转为因子
ggplot(tg,aes(x=dose,y=length,shape=supp))+
geom_line()+
geom_point(size=4,shape=2)
ggplot(tg,aes(x=dose,y=length,fill=supp))+
geom_line()+
geom_point(size=4,shape=2)
ggplot(tg,aes(x=dose,y=length,fill=supp))+
geom_line(position = position_dodge(0.2))+##连接移动线左移和右移动
geom_point(position = position_dodge(0.2),size=4)
修改线条样式
ggplot(BOD,aes(x=Time,y=demand))+
geom_line(linetype="dashed",size=1,color="blue")
tg<-ddply(ToothGrowth,c("supp","dose"),summarise,
length=mean(len))
ggplot(tg,aes(x=dose,y=length,colour=supp))+
geom_line()+
scale_color_brewer(palette = "Set1")
修改数据标记样式
ggplot(BOD,aes(x=Time,y=demand))+
geom_line()+
geom_point(size=4,shape=22,colour="darkred",fill="pink")
ggplot(BOD,aes(x=Time,y=demand))+
geom_line()+
geom_point(size=4,shape=21,colour="darkred",fill="white")
绘制面积图geom_area
sunspotyear<-data.frame(
Year=as.numeric(time(sunspot.year)),
sunspots=as.numeric(sunspot.year)
)
ggplot(sunspotyear,aes(x=Year,y=sunspots))+
geom_area()
ggplot(sunspotyear,aes(x=Year,y=sunspots))+
geom_area(color="black",alpah=0.2,fill="blue")
ggplot(sunspotyear,aes(x=Year,y=sunspots))+
geom_area(alpah=0.2,fill="blue")+
geom_line()
绘制堆积面积图
library(gcookbook)
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+
geom_area()
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+
geom_area(colour="black",size=0.2,alpha=0.4)+
scale_fill_brewer(palette="Blues",breaks=rev(levels(uspopage$AgeGroup)))
# 反转图例
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup,order=desc(AgeGroup)))+
geom_area(colour="black",size=0.2,alpha=0.4)+
scale_fill_brewer(palette="Blues")
#
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup,order=desc(AgeGroup)))+
geom_area(colour=NA,alpha=0.4)+
scale_fill_brewer(palette = "Blues")+
geom_line(position = "stack",size=0.2)
百分比堆积面积图
library(plyr)
uspopage_prop<-ddply(uspopage,"Year",transform,Percent=Thousands/sum(Thousands)*100)
ggplot(uspopage,aes(x=Year,y=uspopage_prop$Percent,fill=AgeGroup))+
geom_area(colour="black",size=0.2,alpha=0.4)+
scale_fill_brewer(palette = "Blues",breaks=rev(levels(uspopage$AgeGroup)))
添加置信区间
library(gcookbook)
clim<-subset(climate,Source=="Berkeley",
select = c("Year","Anomaly10y",
"Unc10y"))
ggplot(clim,aes(x=Year,y=Anomaly10y))+
geom_ribbon(aes(ymin=Anomaly10y-Unc10y,ymax=Anomaly10y+Unc10y),colour="grey50",alpha=0.2)+
geom_line()