涉及的内容
1. 加载文本文件
2. 基础绘图
1.Scatter
2.Line
3.Bar
4.Histogram
5.Box
6.Function
示例代码
####install R packages
> install.packages("ggplot2")
# or use GUIâu0080u0099s menu
####Load a package
> library(ggplot2)
####Load Text Files
#csv with header
> data <- read.csv("c:/datafile.csv")
#csv without header
> data <- read.csv("c:/datafile.csv", header=FALSE)
#Manually assign the header names
> names(data) <- c("Column1","Column2","Column3")
#text file with seperator
> data <- read.csv("c:/datafile.csv", sep="\t")
#Set String/Characters as Factors(by default) or disable it
> data <- read.csv("c:/datafile.csv", stringsAsFactors=FALSE)
#check the structure of a data frame
> str(data)
#reference a column: df$col
>data$Name
> ?read.table
####Load Excel Files, Need JRE installed first, Suggest to save your excel file as csv file
> install.packages("xlsx") #this package has bad support to Chinese
> library(xslx)
> data <- read.xlsx("datafile.xlsx", sheetIndex=1)
> data <- read.xlsx("datafile.xls", sheetName="Scores")
######Plotting
####explore the dataset
> ?mtcars
> mtcars
####scatter plot
> plot(mtcars$wt, mtcars$mpg)
> library(ggplot2)
> qplot(mtcars$wt, mtcars$mpg)
> qplot(wt, mpg, data=mtcars) #if the vectors are in same dataframe
# is equivalent to
> ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()
####line plot
> ?pressure
> pressure
> plot(pressure$temperature, pressure$pressure, type="l")
> points(pressure$temperature, pressure$pressure)
> lines(pressure$temperature, pressure$pressure/2, col="red")
> points(pressure$temperature, pressure$pressure/2, col="red")
> library(ggplot2)
> qplot(pressure$temperature, pressure$pressure, geom="line")
> qplot(temperature, pressure, data=pressure, geom="line")
> ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line()
# Lines and points together
> qplot(temperature, pressure, data=pressure, geom=c("line", "point")) #vector definition uses c()
# Equivalent to:
> ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point()
####bar plot
# pass barplot() a vector of values for the height of each bar
> barplot(BOD$demand)
# assign a vector of labels for each bar
> barplot(BOD$demand, names.arg=BOD$Time)
# visualize a category count: select a, count(*) cnt from table group by a
# 1. Create a Contingency Table
> cyl_cnt <- table(mtcars$cyl)
# 2. Generate a table of counts
> barplot(cyl_cnt)
# ggplot2 bar
> library(ggplot2)
> qplot(BOD$Time, BOD$demand, geom="bar", stat="identity")
# Convert the x variable to a factor, so that it is treated as discrete
> qplot(factor(BOD$Time), BOD$demand, geom="bar", stat="identity")
#pass one vector to calculate the category counts
# cyl is continuous here
> qplot(mtcars$cyl)
# Treat cyl as discrete
> qplot(factor(mtcars$cyl))
#if vectors in a data frame
> qplot(Time, demand, data=BOD, geom="bar", stat="identity")
# This is equivalent to:
> ggplot(BOD, aes(x=Time, y=demand)) + geom_bar(stat="identity")
# Bar graph of counts
qplot(factor(cyl), data=mtcars)
# This is equivalent to:
ggplot(mtcars, aes(x=factor(cyl))) + geom_bar()
####histogram
> hist(mtcars$mpg)
# Specify approximate number of bins with breaks
> hist(mtcars$mpg, breaks=10)
#use ggplot2
> qplot(mtcars$mpg)
> library(ggplot2)
> qplot(mpg, data=mtcars, binwidth=4)
# This is equivalent to:
> ggplot(mtcars, aes(x=mpg)) + geom_histogram(binwidth=4)
####boxplot
# x: factor, y: numeric; When x is a factor, it will automatically create a box plot
> plot(ToothGrowth$supp, ToothGrowth$len)
# Formula syntax
boxplot(len ~ supp, data = ToothGrowth)
# Put interaction of two variables on x-axis
boxplot(len ~ supp + dose, data = ToothGrowth)
# use ggplot2
> library(ggplot2)
> qplot(ToothGrowth$supp, ToothGrowth$len, geom="boxplot")
> qplot(supp, len, data=ToothGrowth, geom="boxplot")
# This is equivalent to:
> ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot()
# Using three separate vectors
> qplot(interaction(ToothGrowth$supp, ToothGrowth$dose), ToothGrowth$len, geom="boxplot")
# Alternatively, get the columns from the data frame
> qplot(interaction(supp, dose), len, data=ToothGrowth, geom="boxplot")
# This is equivalent to:
> ggplot(ToothGrowth, aes(x=interaction(supp, dose), y=len)) + geom_boxplot()
####plot a function
> curve(x^3 - 5*x, from=-4, to=4)
> # Plot a user-defined function
> myfun <- function(xvar) {
> 1/(1 + exp(-xvar + 10))
> }
> curve(myfun(x), from=0, to=20)
# Add a line: add = TRUE
> curve(1-myfun(x), add = TRUE, col = "red")
# use ggplot2
> library(ggplot2)
# This sets the x range from 0 to 20
> qplot(c(0,20), fun=myfun, stat="function", geom="line")
# This is equivalent to:
> ggplot(data.frame(x=c(0, 20)), aes(x=x)) + stat_function(fun=myfun, geom="line")