大数据 | 数据挖掘 | R语言 R绘图Session#1 - 基础

本文介绍如何使用R语言进行基础的数据挖掘和绘图操作,包括加载文本文件和绘制基本图表,通过示例代码展示了R语言在数据可视化方面的应用。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

涉及的内容

1. 加载文本文件

2. 基础绘图

1.Scatter
2.Line
3.Bar
4.Histogram
5.Box
6.Function

示例代码

####install R packages
> install.packages("ggplot2")
# or use GUIâu0080u0099s menu

####Load a package
> library(ggplot2)

####Load Text Files
#csv with header
> data <- read.csv("c:/datafile.csv")

#csv without header
> data <- read.csv("c:/datafile.csv", header=FALSE)
#Manually assign the header names 
> names(data) <- c("Column1","Column2","Column3")

#text file with seperator
> data <- read.csv("c:/datafile.csv", sep="\t")

#Set String/Characters as Factors(by default) or disable it
> data <- read.csv("c:/datafile.csv", stringsAsFactors=FALSE)

#check the structure of a data frame
> str(data)
#reference a column: df$col
>data$Name
> ?read.table

####Load Excel Files, Need JRE installed first, Suggest to save your excel file as csv file
> install.packages("xlsx") #this package has bad support to Chinese
> library(xslx)
> data <- read.xlsx("datafile.xlsx", sheetIndex=1)
> data <- read.xlsx("datafile.xls", sheetName="Scores")

######Plotting
####explore the dataset
> ?mtcars
> mtcars
####scatter plot
> plot(mtcars$wt, mtcars$mpg)

> library(ggplot2)
> qplot(mtcars$wt, mtcars$mpg)
> qplot(wt, mpg, data=mtcars) #if the vectors are in same dataframe
# is equivalent to 
> ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()

####line plot
> ?pressure
> pressure
> plot(pressure$temperature, pressure$pressure, type="l")
> points(pressure$temperature, pressure$pressure)
> lines(pressure$temperature, pressure$pressure/2, col="red")
> points(pressure$temperature, pressure$pressure/2, col="red")

> library(ggplot2)
> qplot(pressure$temperature, pressure$pressure, geom="line")
> qplot(temperature, pressure, data=pressure, geom="line")
> ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line()

# Lines and points together
> qplot(temperature, pressure, data=pressure, geom=c("line", "point")) #vector definition uses c()
# Equivalent to:
> ggplot(pressure, aes(x=temperature, y=pressure)) + geom_line() + geom_point()

####bar plot
# pass barplot() a vector of values for the height of each bar
> barplot(BOD$demand)
# assign a vector of labels for each bar
> barplot(BOD$demand, names.arg=BOD$Time)

# visualize a category count: select a, count(*) cnt from table group by a
# 1. Create a Contingency Table
> cyl_cnt <- table(mtcars$cyl)
# 2. Generate a table of counts
> barplot(cyl_cnt)

# ggplot2 bar
> library(ggplot2)
> qplot(BOD$Time, BOD$demand, geom="bar", stat="identity")
# Convert the x variable to a factor, so that it is treated as discrete
> qplot(factor(BOD$Time), BOD$demand, geom="bar", stat="identity")

#pass one vector to calculate the category counts
# cyl is continuous here 
> qplot(mtcars$cyl)
# Treat cyl as discrete
> qplot(factor(mtcars$cyl))

#if vectors in a data frame
> qplot(Time, demand, data=BOD, geom="bar", stat="identity")
# This is equivalent to:
> ggplot(BOD, aes(x=Time, y=demand)) + geom_bar(stat="identity")

# Bar graph of counts
qplot(factor(cyl), data=mtcars)
# This is equivalent to:
ggplot(mtcars, aes(x=factor(cyl))) + geom_bar()

####histogram

> hist(mtcars$mpg)
# Specify approximate number of bins with breaks
> hist(mtcars$mpg, breaks=10)

#use ggplot2
> qplot(mtcars$mpg)
> library(ggplot2)
> qplot(mpg, data=mtcars, binwidth=4)
# This is equivalent to:
> ggplot(mtcars, aes(x=mpg)) + geom_histogram(binwidth=4)

####boxplot
# x: factor, y: numeric; When x is a factor, it will automatically create a box plot
> plot(ToothGrowth$supp, ToothGrowth$len)

# Formula syntax
boxplot(len ~ supp, data = ToothGrowth)
# Put interaction of two variables on x-axis
boxplot(len ~ supp + dose, data = ToothGrowth)

# use ggplot2
> library(ggplot2)
> qplot(ToothGrowth$supp, ToothGrowth$len, geom="boxplot")

> qplot(supp, len, data=ToothGrowth, geom="boxplot")
# This is equivalent to:
> ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot()

# Using three separate vectors
> qplot(interaction(ToothGrowth$supp, ToothGrowth$dose), ToothGrowth$len, geom="boxplot")

# Alternatively, get the columns from the data frame
> qplot(interaction(supp, dose), len, data=ToothGrowth, geom="boxplot")
# This is equivalent to:
> ggplot(ToothGrowth, aes(x=interaction(supp, dose), y=len)) + geom_boxplot()

####plot a function
> curve(x^3 - 5*x, from=-4, to=4)

> # Plot a user-defined function
> myfun <- function(xvar) {
>     1/(1 + exp(-xvar + 10))
> } 
> curve(myfun(x), from=0, to=20)
# Add a line: add = TRUE
> curve(1-myfun(x), add = TRUE, col = "red")

# use ggplot2
> library(ggplot2)
# This sets the x range from 0 to 20
> qplot(c(0,20), fun=myfun, stat="function", geom="line")
# This is equivalent to:
> ggplot(data.frame(x=c(0, 20)), aes(x=x)) + stat_function(fun=myfun, geom="line")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值