read.*
read.*默认会把字符串转换成factor类型. 我们需要使用stringAsFactors=FALSE
来防止转换.
ex:
ufo <- read.delim(file.path("./ufo_awesome.tsv"),
sep = "\t",
stringsAsFactors = FALSE,
header = FALSE,
na.strings = "")
as.Date日期操作
ex:
ufo$DateOccurred <- as.Date(ufo$DateOccurred, format = "%Y%m%d")
ufo.us <- subset(ufo.us, DateOccurred >= as.Date("1990-01-01"))#提取出90年后的数据!
将对应列转换为特定的日期类型.
日期转化strftime
ufo.us$YearMonth <- strftime(ufo.us$DateOccurred, format = "%Y-%m")#将年月日转化成年月
捕获异常
get.location <- function(l)
{
split.location <- tryCatch(strsplit(l, ",")[[1]],error = function(e) return(c(NA, NA)))#由于并不是所有的格式都为(city,state),利用捕获异常来返回特定的(NA,NA)
clean.location <- gsub("^ ","",split.location)#有一些数据开头有空格,利用正则表达式来替换空格
if (length(clean.location) > 2)
{
return(c(NA,NA))
}
else
{
return(clean.location)
}
}
transform添加新列
ufo <- transform(ufo,
USCity = location.matrix[, 1],
USState = location.matrix[, 2],
stringsAsFactors = FALSE)
ggplot2画直方图ggsave保存
quick.hist <- ggplot(ufo.us, aes(x = DateOccurred)) +
geom_histogram() +
scale_x_date(breaks = "50 years")
ggsave(plot = quick.hist,
filename = file.path("images", "quick_hist.pdf"),
height = 6,
width = 8)
ggplot2多行列图
state.plot <- ggplot(all.sightings, aes(x = YearMonth,y = Sightings)) +
geom_line(aes(color = "darkblue")) + #画直线
facet_wrap(~State, nrow = 10, ncol = 5) + #按照state列画多图
theme_bw() + #白色背景黑色网格线
scale_color_manual(values = c("darkblue" = "darkblue"), guide = "none") +
scale_x_date(breaks = "5 years", labels = date_format('%Y')) +
xlab("Years") +
ylab("Number of Sightings") +
ggtitle("Number of UFO sightings by Month-Year and U.S. State (1990-2010)")
# Save the plot as a PDF
ggsave(plot = state.plot,
filename = file.path("images", "ufo_sightings.pdf"),
width = 14,
height = 8.5)
ggplot2
操作的必须是数据框, scale_x_date
函数将x轴标签的时间周期改为50年.
ggsave
把可视化结果保存在文件里
merge 多列合并
all.sightings <-merge(states.dates,sightings.counts,by.x = c("s", "date.strings"),by.y = c("USState", "YearMonth"),all = TRUE)