Visualizing Data via R (box-plot,histogram,violin,scatter)
load data
library("AzureML")
ws = workspace()
auto.price = download.datasets(ws, "Automobile price data (Raw)")
## Coerce some character columns to numeric
cols <- c('price', 'bore', 'stroke', 'horsepower', 'peak.rpm')
## 替换未知值为NA
auto.price[, cols] = lapply(auto.price[, cols], function(x) ifelse(x == '?', NA, x))
## 字符串替换为数字
auto.price[, cols] = lapply(auto.price[, cols], as.numeric)
## remove rows with NAs
auto.price = auto.price[complete.cases(auto.price), ]
## Add a log transformed column for price
auto.price$lnprice = log(auto.price$price)
## Consolidate