R语言操作练习

(1)加载nyvflights13包,观察flights数据表行列数计数据结构
(2)观察airlines表格行列数数据结构、观察planes表格行列数数据结构、观察airports表格行列数数据结构
(3)通过R语言自带的帮助文档,加深对flights表格中数据的了解
(4)筛选出在1月1日出发的航班
(5)筛选非1月1日出发的航班
(6)筛选出3月19日15:00前出发的航班
(7)按照日期对表格进行升序排列(year,month,day)
(8)按照航班延迟到达时间进行降序排列(提示:arrdelay)
(9)选择年月日三列
(10) 选择非年月日外的所有列
(11)选择航班到达、实际到达、执飞航班、出发地四列
(12) 将表格中tailnum的列更名为tail num
(13)增加一个新的列,名称为gain,它等于ar_delay和dep_delay之差
(14)求dep_delay这一列的平均值,若包含缺失值,将缺失值替代为0
(16)随机抽取任意10个记录、随机抽取1%的记录
(17)选取表格的第1列
(18)选取表格中列名称包含dep开头的列
(19)选取表格中从year到day之间的所有列
(20)选取flights表格中的year、month day hour origin tailnum carrier列,井将其与airlines表格进左连接操作
(21)选取fights表格中的year、month day hourorigin tailnumcarrier列,井将其与planes表格进左连接操作
(22)选取flights表格中的year、month day hour origin tailnum carrier列,#将其与airports表格进左连接操作
(23)对flights表格,按照tailnum分组,然后求每一组记录的数量、平均距离(distance)、平均延迟到达时间(arrdelay)、最后筛选出记录数大于20条、距离小于2000的记录,若包含缺失值,将缺失值替代为0
25.筛选出carrier为”DL”执飞航班tailnum为D942DN的行
26.筛选出执飞航班为NOEGMQ、flight为4610的航班
27.筛选出飞行距离小于80km的航班
28.筛选出飞行距离介于200-800km的航班
29.筛选出出发地为洛杉矶(LGA)的航班
30.筛选出飞行时间(air_time)大于4小时的长途航班
31.同时筛选出carrier为EV,flight为6181,tailnum为N12136,origin为EWR,dest为IAD的航班
32.同时筛选出飞行时间大于4小时、hour大于8小时,距离大于100km的航班
以下数据针对Planes数据框开展
33.筛选出制造商为波音公司的飞机
34.筛选出制造商为空中客车公司的飞机
35.筛选出制造商为巴西航空工业公司的飞机
36.筛选出机型为A320的飞机
37.筛选出机型位A737公司的飞机
38.联合筛选波音公司、机型767的飞机
39.联合筛选空客公司、机型A320的飞机
40.联合筛选波音公司、机型767、座位数量小于50的飞机
41.按航班号"tailnum”左连接合并planes和flights
42.以月为group,求各个月平均延误时间、平均到达延误时间、飞行时长、距离

# (1)
library(nycflights13)
library(dplyr)
dim(flights)
print(dim(flights))
str(flights)
print(str(flights))
# (2)
dim(airlines)
print(dim(airlines))
str(airlines)
print(str(airlines))
dim(planes)
print(dim(planes))
str(planes)
print(str(planes))
dim(airports)
print(dim(airports))
str(airports)
print(str(airports))
# (3)
?flights
# (4)
f_j1 <- subset(flights, month == 1 & day == 1)
print(f_j1)
# (5)
f_nj1 <- flights %>% filter(!(month == 1 & day == 1))
print(f_nj1)
# (6)
f_m19b15 <- flights %>% filter(month == 3 & day == 19 & hour < 15)
print(f_m19b15)
# (7)
f_asc <- flights %>% arrange(year, month, day)
print(f_asc)
# (8)
f_ad_desc <- flights %>% arrange(desc(arr_delay))
print(f_ad_desc)
# (9)
f_ymd <- flights %>% select(year, month, day)
print(f_ymd)
# (10)
f_oth <- flights %>% select(-year, -month, -day)
print(f_oth)
# (11)
f_sel <- flights %>% select(arr_time, sched_arr_time, tailnum, origin)
print(f_sel)
# (12)
f_tn <- flights %>% rename(`tail num` = tailnum)
print(f_tn)
# (13)
f_gn <- flights %>% mutate(gain = arr_delay - dep_delay)
print(f_gn)
# (14)
m_dd <- mean(flights$dep_delay, na.rm = TRUE)
print(m_dd)
flights$dep_delay[is.na(flights$dep_delay)] <- 0
m_dd0 <- mean(flights$dep_delay)
print(m_dd0)
# (16)
s_10 <- flights %>% sample_n(10)
print(s_10)
s_1p <- flights %>% sample_frac(0.01)
print(s_1p)
# (17)
f_fc <- flights %>% select(1)
print(f_fc)
# (18)
f_dep <- flights %>% select(starts_with("dep"))
print(f_dep)
# (19)
f_yd <- flights %>% select(year:day)
print(f_yd)
# (20)
f_al <- flights %>% select(year, month, day, hour, origin, tailnum, carrier) %>% left_join(airlines, by = "carrier")
print(f_al)
# (21)
f_pl <- flights %>% select(year, month, day, hour, origin, tailnum, carrier) %>% left_join(planes, by = "tailnum")
print(f_pl)
# (22)
f_ap <- flights %>% 
  select(year, month, day, hour, origin, tailnum, carrier) %>% 
  left_join(airports, by = c("origin" = "faa"))
print(f_ap)
# (23)
f_grp <- flights %>% group_by(tailnum) %>% 
  summarise(cnt = n(),
            avg_d = mean(distance, na.rm = TRUE),
            avg_a = mean(arr_delay, na.rm = TRUE)) %>% 
  filter(cnt > 20, avg_d < 2000)
print(f_grp)
f_grp$avg_d[is.na(f_grp$avg_d)] <- 0
print(f_grp)
# (25)
f_dldn <- flights %>% filter(carrier == "DL" & tailnum == "D942DN")
print(f_dldn)
# (26)
f_n46 <- flights %>% filter(tailnum == "NOEGMQ" & flight == 4610)
print(f_n46)
# (27)
f_80 <- flights %>% filter(distance < 80)
print(f_80)
# (28)
f_28 <- flights %>% filter(distance >= 200 & distance <= 800)
print(f_28)
# (29)
f_lga <- flights %>% filter(origin == "LGA")
print(f_lga)
# (30)
f_lt <- flights %>% filter(air_time > 240)
print(f_lt)
# (31)
f_ev <- flights %>% filter(carrier == "EV" & flight == 6181 & tailnum == "N12136" & origin == "EWR" & dest == "IAD")
print(f_ev)
# (32)
f_lhd <- flights %>% filter(air_time > 240 & hour > 8 & distance > 100)
print(f_lhd)
# (33)
p_bng <- planes %>% filter(manufacturer == "Boeing")
print(p_bng)
#(34)
p_ab <- planes %>% filter(manufacturer == "Airbus")
print(p_ab)
#(35)
p_emb <- planes %>% filter(manufacturer == "Embraer")
print(p_emb)
# (36)
p_a3 <- planes %>% filter(model == "A320")
print(p_a3)
#(37)
p_a7 <- planes %>% filter(model == "A737")
print(p_a7)
# (38)
p_b7 <- planes %>% filter(manufacturer == "Boeing" & model == "767")
print(p_b7)
#(39)
p_a3 <- planes %>% filter(manufacturer == "Airbus" & model == "A320")
print(p_a3)
#(40)
p_b7s <- planes %>% filter(manufacturer == "Boeing" & model == "767" & seats < 50)
print(p_b7s)
# (41)
p_ft <- planes %>% left_join(flights, by = "tailnum")
print(p_ft)
# (42)
f_mth <- flights %>% group_by(month) %>% 
  summarise(avg_dd = mean(dep_delay, na.rm = TRUE),
            avg_ad = mean(arr_delay, na.rm = TRUE),
            avg_at = mean(air_time, na.rm = TRUE),
            avg_d = mean(distance, na.rm = TRUE))
print(f_mth)
setwd("C:\\Users\\zhang\\Desktop\\2")
library(nycflights13)
library(dplyr)
library(readr)
inspect_data <- function(df) {
  cat("Dimensions:", dim(df), "\n")
  str(df)
}
inspect_data(flights)
inspect_data(airlines)
inspect_data(planes)
inspect_data(airports)
help("flights")
save_csv <- function(df, filename) {
  write.csv(df, filename, row.names = FALSE)
}
save_csv(flights, "flights_data.csv")
save_csv(airlines, "airlines_data.csv")
save_csv(planes, "planes_data.csv")
save_csv(airports, "airports_data.csv")
flights <- read_csv("flights_data.csv")
airlines <- read_csv("airlines_data.csv")
planes <- read_csv("planes_data.csv")
airports <- read_csv("airports_data.csv")
filter_and_save <- function(df, condition, filename) {
  filtered_df <- df %>% filter(!!condition)
  save_csv(filtered_df, filename)
  filtered_df
}
jan1_condition <- quote(month == 1 & day == 1)
jan1_flights <- filter_and_save(flights, jan1_condition, "jan1_flights.csv")
non_jan1_condition <- quote(!(month == 1 & day == 1))
non_jan1_flights <- filter_and_save(flights, non_jan1_condition, "non_jan1_flights.csv")
mar19_before15_condition <- quote(month == 3 & day == 19 & hour < 15)
mar19_before15 <- filter_and_save(flights, mar19_before15_condition, "mar19_before15.csv")
flights_asc <- flights %>% arrange(year, month, day)
save_csv(flights_asc, "flights_asc.csv")
flights_desc <- flights %>% arrange(desc(arr_delay))
save_csv(flights_desc, "flights_desc.csv")
flights_ymd <- flights %>% select(year, month, day)
save_csv(flights_ymd, "flights_ymd.csv")
flights_other <- flights %>% select(-year, -month, -day)
save_csv(flights_other, "flights_other.csv")
flights_selected <- flights %>% select(arr_time, sched_arr_time, tailnum, origin)
save_csv(flights_selected, "flights_selected.csv")
flights_renamed <- flights %>% rename(tail_num = tailnum)
save_csv(flights_renamed, "flights_renamed.csv")
flights_gain <- flights %>% mutate(gain = arr_delay - dep_delay)
save_csv(flights_gain, "flights_gain.csv")
flights$dep_delay[is.na(flights$dep_delay)] <- 0
mean_dep_delay <- mean(flights$dep_delay)
save_csv(data.frame(mean_dep_delay), "mean_dep_delay.csv")
sample_n <- flights %>% sample_n(10)
save_csv(sample_n, "sample_10.csv")
sample_frac <- flights %>% sample_frac(0.01)
save_csv(sample_frac, "sample_1p.csv")
first_column <- flights %>% select(1)
save_csv(first_column, "first_column.csv")
dep_columns <- flights %>% select(starts_with("dep"))
save_csv(dep_columns, "dep_columns.csv")
year_to_day <- flights %>% select(year:day)
save_csv(year_to_day, "year_to_day.csv")
flights_airlines <- flights %>% 
  select(year, month, day, hour, origin, tailnum, carrier) %>% 
  left_join(airlines, by = "carrier")
save_csv(flights_airlines, "flights_airlines.csv")
flights_planes <- flights %>% 
  select(year, month, day, hour, origin, tailnum, carrier) %>% 
  left_join(planes, by = "tailnum")
save_csv(flights_planes, "flights_planes.csv")
flights_airports <- flights %>% 
  select(year, month, day, hour, origin, tailnum, carrier) %>% 
  left_join(airports, by = c("origin" = "faa"))
save_csv(flights_airports, "flights_airports.csv")
flights_grouped <- flights %>% 
  group_by(tailnum) %>% 
  summarise(
    count = n(),
    avg_distance = mean(distance, na.rm = TRUE),
    avg_arrival_delay = mean(arr_delay, na.rm = TRUE)
  ) %>% 
  filter(count > 20, avg_distance < 2000)
save_csv(flights_grouped, "flights_grouped.csv")
specific_flight_condition <- quote(carrier == "DL" & tailnum == "D942DN")
specific_flight <- filter_and_save(flights, specific_flight_condition, "specific_flight.csv")
specific_tail_flight_condition <- quote(tailnum == "NOEGMQ" & flight == 4610)
specific_tail_flight <- filter_and_save(flights, specific_tail_flight_condition, "specific_tail_flight.csv")
short_flights_condition <- quote(distance < 80)
short_flights <- filter_and_save(flights, short_flights_condition, "short_flights.csv")
medium_flights_condition <- quote(distance >= 200 & distance <= 800)
medium_flights <- filter_and_save(flights, medium_flights_condition, "medium_flights.csv")
lga_flights_condition <- quote(origin == "LGA")
lga_flights <- filter_and_save(flights, lga_flights_condition, "lga_flights.csv")
long_flights_condition <- quote(air_time > 240)
long_flights <- filter_and_save(flights, long_flights_condition, "long_flights.csv")
ev_flight_condition <- quote(
  carrier == "EV" & 
    flight == 6181 & 
    tailnum == "N12136" & 
    origin == "EWR" & 
    dest == "IAD"
)
ev_flight <- filter_and_save(flights, ev_flight_condition, "ev_flight.csv")
complex_filter_condition <- quote(air_time > 240 & hour > 8 & distance > 100)
complex_filter <- filter_and_save(flights, complex_filter_condition, "complex_filter.csv")
boeing_condition <- quote(manufacturer == "Boeing")
boeing_planes <- filter_and_save(planes, boeing_condition, "boeing_planes.csv")
airbus_condition <- quote(manufacturer == "Airbus")
airbus_planes <- filter_and_save(planes, airbus_condition, "airbus_planes.csv")
embraer_condition <- quote(manufacturer == "Embraer")
embraer_planes <- filter_and_save(planes, embraer_condition, "embraer_planes.csv")
a320_condition <- quote(model == "A320")
a320_planes <- filter_and_save(planes, a320_condition, "a320_planes.csv")
a737_condition <- quote(model == "A737")
a737_planes <- filter_and_save(planes, a737_condition, "a737_planes.csv")
boeing_767_condition <- quote(manufacturer == "Boeing" & model == "767")
boeing_767 <- filter_and_save(planes, boeing_767_condition, "boeing_767.csv")
airbus_a320_condition <- quote(manufacturer == "Airbus" & model == "A320")
airbus_a320 <- filter_and_save(planes, airbus_a320_condition, "airbus_a320.csv")
boeing_767_seats_condition <- quote(manufacturer == "Boeing" & model == "767" & seats < 50)
boeing_767_seats <- filter_and_save(planes, boeing_767_seats_condition, "boeing_767_seats.csv")
planes_flights <- planes %>% left_join(flights, by = "tailnum")
save_csv(planes_flights, "planes_flights.csv")
monthly_stats <- flights %>% 
  group_by(month) %>% 
  summarise(
    avg_dep_delay = mean(dep_delay, na.rm = TRUE),
    avg_arr_delay = mean(arr_delay, na.rm = TRUE),
    avg_air_time = mean(air_time, na.rm = TRUE),
    avg_distance = mean(distance, na.rm = TRUE)
  )
save_csv(monthly_stats, "monthly_stats.csv")
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值