####--------6.1描述性统计----------#######
##############(6.1——1)描述性统计量##############
#借助R语言内置数据集airquality介绍描述性统计的概念
head(airquality)
#集中趋势
mean(airquality$Ozone,na.rm = TRUE)
#算数截断平均数
mean(airquality$Ozone,na.rm = TRUE,trim= 0.1)
median(airquality$Ozone,na.rm = TRUE)# 中位数
#离散趋势
sd(airquality$Ozone,na.rm = TRUE)# 标准差
var(airquality$Ozone,na.rm = TRUE)# 方差
#峰度和偏度(需要先安装moments扩展包:install.packages("moments")
library(moments)
skewness(airquality$Ozone,na.rm = TRUE)# 偏度
kurtosis(airquality$Ozone,na.rm = TRUE)# 峰度
#百分位数
quantile(airquality$Ozone,probs = seq(0,1,by = 0.1),na.rm = TRUE)
#数量
length(airquality$Month)
table(airquality$Month)
##############(6.1——2)同时呈现多个统计量##############
library(ggplot2)
head(diamonds)
summary(diamonds)
library(psych)
describe(diamonds)
####--------6.2相关分析----------#######
# 构建数据框:社区ID、绿化率、幸福感评分
community_data <- data.frame(
community_id = 1:15,
green_rate = c(25, 30, 22, 35, 18, 40, 28, 32, 20, 38, 26, 33, 19, 36, 24), # 绿化率(%)
happiness = c(6.2, 7.5, 5.8, 8.1, 5.2, 8.5, 6.9, 7.8, 5.5, 8.3, 6.5, 7.6, 5.3, 8.0, 6.1) # 幸福感评分
)
head(community_data)
# 绘制散点图,直观观察关联趋势
plot(community_data$green_rate, community_data$happiness,
xlab = "绿化率(%)", ylab = "居民幸福感评分",
main = "绿化率与幸福感的散点图", col = "forestgreen", pch = 16)
# 添加趋势线
abline(lm(happiness ~ green_rate, data = community_data), col = "red", lwd = 2)
#计算皮尔逊相关系数
cor_result <- cor(community_data$green_rate, community_data$happiness,
method = "pearson")
print(cor_result)
####--------6.3 t 检验----------#######
# 构建数据框:广场类型(A/B)与停留时间
square_data <- data.frame(
type = c(rep("A", 12), rep("B", 12)), # A=硬质铺装,B=生态绿化
stay_time = c(18, 22, 15, 20, 17, 21, 16, 19, 23, 14, 20, 18, # 方案A停留时间
25, 30, 28, 32, 26, 29, 31, 27, 24, 33, 28, 30) # 方案B停留时间
)
head(square_data) # 查看前6行
# 公式:因变量 ~ 分组变量,var.equal=TRUE表示方差齐
t_result <- t.test(stay_time ~ type, data = square_data, var.equal = TRUE)
print(t_result)
# 构建数据框:商户ID、改造前/后营业额
business_data <- data.frame(
shop_id = 1:10,
before = c(8.5, 7.2, 9.1, 6.8, 7.5, 8.2, 6.5, 9.3, 7.8, 8.0), # 改造前
after = c(12.3, 10.5, 13.2, 9.8, 11.0, 12.8, 9.2, 14.5, 11.5, 12.0) # 改造后
)
head(business_data)
# paired=TRUE表示配对检验,指定前后两组数据
t_paired <- t.test(business_data$before, business_data$after, paired = TRUE)
print(t_paired)
####--------6.4 方差分析----------#######
# 构建数据框:小区ID、建筑密度类型、儿童活动空间人均面积
child_space <- data.frame(
district_id = 1:18,
density_type = rep(c("低密度", "中密度", "高密度"), each = 6), # 3组,每组6个小区
area = c(3.2, 3.5, 3.1, 3.6, 3.3, 3.4, # 低密度小区
2.5, 2.3, 2.7, 2.4, 2.6, 2.5, # 中密度小区
1.8, 1.6, 1.9, 1.7, 1.5, 1.8) # 高密度小区
)
head(child_space)
# 构建方差分析模型:因变量~分组变量
anova_model <- aov(area ~ density_type, data = child_space)
summary(anova_model) # 输出方差分析表
tukey_result <- TukeyHSD(anova_model)
print(tukey_result)
####--------6.5 回归分析----------#######
##############(6.5——1)简单线性回归##############
# 1. 模拟数据(实际场景可替换为read.csv读取真实数据)
community_data <- data.frame(
community_id = 1:15,
green_rate = c(25, 30, 22, 35, 18, 40, 28, 32, 20, 38, 26, 33, 19, 36, 24), # 绿化率(%)
happiness = c(6.2, 7.5, 5.8, 8.1, 5.2, 8.5, 6.9, 7.8, 5.5, 8.3, 6.5, 7.6, 5.3, 8.0, 6.1) # 幸福感评分
)
# 2.可视化:散点图观察线性趋势
library(ggplot2)
ggplot(community_data, aes(x = green_rate, y = happiness)) +
geom_point(color = "steelblue", size = 3) + # 散点图
labs(x = "绿化率(%)", y = "居民幸福感评分", title = "绿化率与幸福感的散点图") +
theme_minimal()
#用lm()函数构建模型(因变量 ~ 自变量)
model_simple <- lm(happiness ~ green_rate, data = community_data)
# 查看模型结果
summary(model_simple)
##############(6.5——2)多元线性回归##############
# 1. 模拟数据
commercial_data <- data.frame(
floor=sample(5:30,30,replace=TRUE), # 层数:5-30层
glazing=runif(30,min=30,max=70), # 窗墙比:30%-70%
insulation=runif(30,min=0.3,max=0.9) # 保温系数:0.3-0.9
)
# 能耗强度=基础值+层数影响+窗墙比影响-保温性影响+随机误差
commercial_data$energy_intensity <- 300+5*commercial_data$floor+
2*commercial_data$glazing-150*commercial_data$insulation+
rnorm(30, 0, 20)
# 2. 构建多元线性回归模型
model_multi <- lm(energy_intensity ~ floor+glazing+insulation,data = commercial_data)
# 查看结果
summary(model_multi)
coefficients(model_multi)
confint(model_multi)
fitted(model_multi)
residuals(model_multi)
anova(model_multi)
plot(model_multi)
predict(model_multi)
library (car)
vif <- vif (model_multi)
vif
#查看哪些变量膨胀因子大于10
vif > 10
#查看哪些变量膨胀因子的开方大于2
sqrt (vif) > 2
####--------6.6 聚类分析----------#######
##############(6.6——1)层次聚类##############
# 模拟10个地块的属性数据
set.seed(123) # 固定随机数,保证结果可复现
plot_data <- data.frame(
地块编号 = paste0("地块", 1:10),
面积_公顷 = runif(10, min=0.5, max=5), # 面积:0.5-5公顷
容积率 = runif(10, min=1.2, max=5), # 容积率:1.2-5(住宅通常1.2-3,商业可能更高)
绿化率 = runif(10, min=0.2, max=0.4), # 绿化率:20%-40%
距地铁站_km = runif(10, min=0.3, max=3) # 距地铁站距离:0.3-3km
)
rownames(plot_data) <- plot_data$地块编号 # 用地块编号作为行名
plot_data <- plot_data[,-1] # 去除编号列,保留数值变量
head(plot_data) # 查看前6行
# 标准化:将数据转换为均值0、标准差1的正态分布
plot_scaled <- scale(plot_data)
# 计算距离矩阵(欧氏距离)
dist_matrix <- dist(plot_scaled, method = "euclidean")
# 凝聚式聚类(平均连接法:以类间平均距离为连接标准)
hc_result <- hclust(dist_matrix, method = "average")
# 绘制树状图
plot(hc_result,
main = "地块功能聚类树状图",
xlab = "地块",
ylab = "距离",
cex = 0.8) # 调整标签大小
# 切割树状图,分为3类
rect.hclust(hc_result, k = 3, border = 2:4) # 用不同颜色框出类别
##############(6.6——2)K均值聚类##############
set.seed(456)
community_data <- data.frame(
社区编号 = paste0("社区", 1:15),
公园需求 = sample(1:10, 15, replace = TRUE),
学校需求 = sample(1:10, 15, replace = TRUE),
医院需求 = sample(1:10, 15, replace = TRUE),
菜市场需求 = sample(1:10, 15, replace = TRUE)
)
rownames(community_data) <- community_data$社区编号
community_data <- community_data[, -1]
head(community_data)
# 计算不同K值的总误差(类内平方和)
# wss <- sapply(1:6, function(k) {
# kmeans(community_data, centers = k, nstart = 20)$tot.withinss
# })
#
# # 绘制肘部图
# plot(1:6, wss, type = "b",
# xlab = "聚类数K",
# ylab = "总类内平方和",
# main = "肘部法确定最佳K值")
# # K=3,重复20次
# km_result <- kmeans(
# x = community_data,
# centers = 3,
# nstart = 20 # 多次初始化,取最优结果
# )
#install.packages("NbClust")
library(NbClust)
NbClust(community_data,distance = "euclidean",min.nc=2,max.nc=8,method="kmeans")
km_result <- kmeans (community_data,centers = 3,iter.max = 100,nstart = 30)
#查看聚类结果
km_result$cluster # 每个社区的类别
km_result$centers # 各类别的中心(需求均值)
#install.packages("ggfortify")
library (ggfortify)
autoplot(km_result,data=community_data,label = TRUE,lable.size=2,frame=TRUE)
####--------6.7 主成分分析----------#######
# 加载必备包
library(ggplot2)
library(dplyr)
library(factoextra)
#构建规划指标数据框(真实场景可替换为Excel导入:read.csv("居住区数据.csv"))
residential_data <- data.frame(
居住区编号 = paste0("R", 1:8),
容积率FAR = c(2.0, 2.0, 2.5, 1.6, 2.4, 2.7, 1.9, 2.4),
绿地率GLR = c(45, 35, 30, 45, 38, 28, 40, 32),
日照达标率SR = c(80, 92, 80, 95, 85, 78, 90, 82),
配套设施数量FS = c(4, 9, 5, 9, 7, 4, 8, 5),
步行可达性WA = c(7.5, 7.5, 7.2, 9.0, 8.1, 6.8, 8.3, 7.5),
噪声控制NC = c(82, 88, 79, 90, 75, 76, 86, 80)
)
#分离分组列和指标列
index_data <- residential_data[, -1] # 剔除“居住区编号”,保留6个指标
group_info <- residential_data$居住区编号 # 分组信息(用于可视化)
#执行PCA(标准化必选,指标单位差异大:如绿地率% vs 设施数量个)
pca_livable <- prcomp(index_data, scale. = TRUE)
#查看核心结果
##(1)方差解释率(规划决策关键:保留累计≥85%的主成分)
summary(pca_livable)
##(2)载荷矩阵
pca_livable$rotation
#碎石图
fviz_eig(pca_livable, addlabels = TRUE, main = "规划决策PCA碎石图")
# 提取前2个主成分得分,合并居住区编号
pc_scores <- as.data.frame(pca_livable$x[, 1:2]) %>%
mutate(居住区 = group_info)
# 绘制得分图(按PC1排序,PC1是核心指标)
ggplot(pc_scores, aes(x = PC1, y = PC2, color = 居住区, label = 居住区)) +
geom_point(size = 4) + # 点放大,适配报告
geom_text(vjust = -1) + # 标签在点上方
labs(title = "居住区宜居性PCA得分图",
x = paste0("PC1(生态舒适度,", round(summary(pca_livable)$importance[2,1]*100,1), "%)"),
y = paste0("PC2(生活便利度,", round(summary(pca_livable)$importance[2,2]*100,1), "%)")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) # 标题居中 install.packages("ggplot2")
install.packages("openxlsx")
install.packages("readxl")
install.packages("foreign")
install.packages("haven")
install.packages("reshape2")
install.packages("magrittr")
install.packages("dplyr")
install.packages("tidyr")
install.packages("psych")
install.packages("car")
install.packages("moments")
install.packages("NbClust")
install.packages("ggfortify") 根据利用学习上面的代码以及所需包,在Rstudio上分析天津市与北京市两个地区建筑密度和绿地率与热岛效应的关系,数据包括两个地区每个月的城区与郊区温度以及建筑密度和绿地率
最新发布