t1 <- read.csv("elder1.csv")
t2 <- read.csv("elder2.csv")
library(dplyr)
tt <- merge(t1, t2, by = "ID", all = FALSE)
write.csv(tt, file = "ttnew.csv")
tt <- mutate(tt, bmi = weight / (height^2) * 10000)
tt$bmi[tt$bmi < 18.5] <- 0
tt$bmi[tt$bmi >= 18.5 & tt$bmi < 24] <- 1
tt$bmi[tt$bmi >= 24 & tt$bmi < 28] <- 2
tt$bmi[tt$bmi > 28] <- 3
tt$bmi2 <- tt$bmi
tt <- within(tt, {
bmi2 <- NA
bmi2[bmi < 18.5] <- 0
bmi2[bmi >= 18.5 & bmi < 24] <- 1
bmi2[bmi >= 24 & bmi < 28] <- 2
bmi2[bmi > 28] <- 3
bmi2 <- factor(bmi2, levels = c(0, 1, 2, 3))
})
print(levels(tt$bmi2))
print(table((tt$bmi2)))
tt$pressure <- ifelse(tt$SBP >= 140 | tt$DBP >= 90, 2, 1)
print(table(tt$pressure))
tt$sugar <- ifelse(tt$sugar > 7, 2, 1)
print(table(tt$sugar))
table(tt$income)
tt$income1[tt$income < 3] <- 0
tt$income1[tt$income >= 3 & tt$income < 6] <- 1
tt$incmoe1[tt$income >= 6] <- 2
print(table(tt$income1))
str(tt)
tt$sex <- factor(tt$sex)
tt$bmi1 <- factor(tt$bmi, levels = c(1, 0, 2, 3))
tt$pressure <- factor(tt$pressure)
tt$marriage <- factor(tt$marriage, levels = c(2, 1, 3, 4, 5))
str(tt)
# join数据合并
tt <- inner_join(t1, t2, by = c("ID"))
tt1 <- full_join(t1, t2, by = c("ID"))
tt2 <- left_join(t1, t2, by = c("ID"))
tt3 <- right_join(t1, t2, by = c("ID"))
# filter数据筛选
t11 <- filter(tt, SBP >= 140)
t12 <- filter(tt, SBP >= 140, DBP >= 90, sugar > 7)
t13 <- filter(tt, SBP >= 140 | DBP >= 90)
t14 <- filter(tt, sex == 1)
# arrange排列函数
t15 <- arrange(tt, SBP)
t16 <- arrange(tt, desc(DBP))
t17 <- arrange(tt, SBP, DBP)
t18 <- arrange(tt, desc(DBP), SBP)
# select选择函数
t19 <- select(tt, -DBP, -SBP)
t20 <- select(tt, DBP, HDL, everything(), -ID)
# mutate变形函数
t21 <- mutate(tt, x1 = weight * 2, x2 = height / 100)
print(table(tt$income))
t22 <- mutate(tt, income = recode(income,
"1" = 1, "2" = 1, "3" = 2, "4" = 2, "5" = 3, "6" = 3, "7" = 4, "8" = 4
))
t22$income <- factor(t22$income)
print(table(t22$income))
str(t22)
# summarise
t23 <- summarise(tt, DBP_mean = mean(DBP), DBP_median = median(DBP))
t24 <- t1 %>%
summarise(across(
c(DBP, SBP),
list(
count = ~ n(),
mean = ~ mean(., na.rm = TRUE),
median = ~ median(., na.rm = TRUE)
)
))
t25 <- t1 %>%
summarise(across(
where(is.numeric),
list(
count = ~ n(),
mean = ~ mean(., na.rm = TRUE),
median = ~ median(., na.rm = TRUE)
)
))
# group by
tt01 <- group_by(tt, sex)
tt02 <- summarise(tt01, count = n())
tt03 <- tt01 %>%
summarise(across(c(DBP, SBP), list(
mean = ~ mean(., na.rm = TRUE),
median = ~ median(., na.rm = TRUE),
count = ~ sum(!is.na(.))
)))