R语言实现散点图【附代码】

1.下载软件包:网上一大堆下载链接,也可以公众号搜索

2.效果图先附上

3,所需数据:验证数据为csv表格-UTM8编码格式!!!r语言不能直接读取excel表格,需将EXCEL先用记事本打开--再保存为csv表格,编码为UTM

4,代码执行:

# 安装和加载必要的包
if (!require(ggplot2)) install.packages("ggplot2")
if (!require(dplyr)) install.packages("dplyr")
if (!require(data.table)) install.packages("data.table")

library(ggplot2)
library(dplyr)
library(data.table)

# 设置系统编码
Sys.setlocale("LC_ALL", "Chinese")

# 让用户输入文件路径
cat("请输入CSV文件的完整路径(或按回车使用文件选择对话框):\n")
file_path <- readline()

# 如果用户没有输入路径,则使用文件选择对话框
if (file_path == "") {
  cat("请在弹出窗口中选择文件...\n")
  file_path <- file.choose()
}

# 在tryCatch中使用file_path替换硬编码的路径
tryCatch({
  # 尝试不同的编码方式
  encodings <- c("UTF-8", "GBK", "GB18030", "BIG5")
  data <- NULL
  
  for (enc in encodings) {
    tryCatch({
      cat("尝试使用", enc, "编码读取文件...\n")
      temp_data <- read.csv(file_path,
                           fileEncoding = enc,
                           check.names = FALSE,
                           stringsAsFactors = FALSE)
      if (!is.null(temp_data) && nrow(temp_data) > 0 && ncol(temp_data) > 0) {
        data <- temp_data
        cat("成功使用", enc, "编码读取文件\n")
        break
      }
    }, error = function(e) {
      cat("使用", enc, "编码失败:", conditionMessage(e), "\n")
    })
  }
  
  # 读取数据后,显示列名
  cat("\n读取到的列名:\n")
  print(names(data))
  
  # 只检查必需的三列
  required_cols <- c("站点观测", "VPM10km", "VPM500m")
  
  # 标准化列名(移除特殊字符并统一格式)
  standardize_colname <- function(name) {
    name <- gsub("[^a-zA-Z0-9\u4e00-\u9fa5]", "", name)  # 只保留字母、数字和中文
    return(tolower(name))        # 转换为小写
  }
  
  # 创建列名映射
  column_mapping <- list(
    "站点观测" = c("站点观测", "站点_观测", "观测站点", "观测"),
    "VPM10km" = c("VPM10km", "VPM-10km", "VPM_10km", "vpm10km", "VPM 10km"),
    "VPM500m" = c("VPM500m", "VPM-500m", "VPM_500m", "vpm500m", "VPM 500m")
  )
  
  # 标准化当前列名
  current_cols <- names(data)
  standardized_current <- sapply(current_cols, standardize_colname)
  
  # 检查并重命名列
  for (expected_col in required_cols) {
    variants <- column_mapping[[expected_col]]
    standardized_variants <- sapply(variants, standardize_colname)
    
    # 查找匹配的列名
    found <- FALSE
    for (i in seq_along(current_cols)) {
      if (standardized_current[i] %in% standardized_variants) {
        if (current_cols[i] != expected_col) {
          data[[expected_col]] <- data[[current_cols[i]]]
          data[[current_cols[i]]] <- NULL
        }
        found <- TRUE
        break
      }
    }
    
    if (!found) {
      stop(sprintf("找不到必需的列: %s", expected_col))
    }
  }
  
  # 验证最终的列名
  cat("\n最终的列名:\n")
  print(names(data))
  
  # 检查并转换所有数据列为数值型
  for (col in names(data)) {
    data[[col]] <- as.numeric(as.character(data[[col]]))
    if (all(is.na(data[[col]]))) {
      stop(sprintf("列 '%s' 包含无效数据", col))
    }
  }
  
  # 显示数据基本信息
  cat("\n数据维度:", dim(data), "\n")
  cat("列名:", names(data), "\n")
  cat("\n数据前几行:\n")
  print(head(data))
  
  # 读取数据后,先检查数据的完整性
  cat("\n数据总行数:", nrow(data), "\n")
  
  # 检查每列的数据范围和异常值
  for (col in names(data)) {
    cat("\n", col, "的统计信息:\n")
    cat("最小值:", min(data[[col]], na.rm = TRUE), "\n")
    cat("最大值:", max(data[[col]], na.rm = TRUE), "\n")
    cat("平均值:", mean(data[[col]], na.rm = TRUE), "\n")
    cat("中位数:", median(data[[col]], na.rm = TRUE), "\n")
    cat("NA值数量:", sum(is.na(data[[col]])), "\n")
    
    # 检查异常值(超过3个标准差的值)
    mean_val <- mean(data[[col]], na.rm = TRUE)
    sd_val <- sd(data[[col]], na.rm = TRUE)
    outliers <- data[[col]][abs(data[[col]] - mean_val) > 3 * sd_val]
    if(length(outliers) > 0) {
      cat("可能的异常值:", outliers, "\n")
    }
  }
  
  # 检查x和y轴的数据对应关系
  cat("\n检查站点观测和VPM10km的对应关系:\n")
  correlation <- cor(data$站点观测, data$VPM10km, use = "complete.obs")
  cat("相关系数:", correlation, "\n")
  
  # 输出散点图矩阵
  pairs(data[, c("站点观测", "VPM10km", "VPM500m")])
  
  # 设置多图布局
  par(mfrow = c(1, 2))  # 1行2列的布局
  
  # 分别创建两个对比图
  # 1. 站点观测 vs VPM10km
  title_1 <- "(A)VPM10km vs 站点观测"
  plot_comparison_base(data$站点观测, data$VPM10km, title_1)
  
  # 2. 站点观测 vs VPM500m
  title_2 <- "(B)VPM500m vs 站点观测"
  plot_comparison_base(data$站点观测, data$VPM500m, title_2)
  
  # 重置图形参数
  par(mfrow = c(1, 1))
  
  # 显示这对数据的统计指标
  cat("\nVPM500m vs 站点观测的统计指标:\n")
  cat("R²:", round(cor(data$VPM500m, data$站点观测, use = "complete.obs")^2, 3), "\n")
  cat("RMSE:", round(RMSE(data$VPM500m, data$站点观测), 3), "\n")
  cat("NSE:", round(NSE(data$VPM500m, data$站点观测), 3), "\n")
  cat("Bias:", round(Bias(data$VPM500m, data$站点观测), 3), "\n")
  
}, error = function(e) {
  cat("错误信息:", conditionMessage(e), "\n")
  cat("\n请检查:\n")
  cat("1. 文件是否存在\n")
  cat("2. 文件是否为标准CSV格式\n")
  cat("3. 文件是否包含所需的所有列\n")
  cat("4. 数据是否为有效的数值\n")
  cat("5. 文件编码是否正确\n")
})

NSE <- function(Qsim, Qobs){
  # 创建数据框而不是data.table
  df <- data.frame(Qsim = Qsim, Qobs = Qobs)
  df <- na.omit(df)
  
  # 如果数据为空,返回NA
  if(nrow(df) == 0) return(NA)
  
  Qsim <- df$Qsim
  Qobs <- df$Qobs
  
  Qobs_mean <- mean(Qobs)
  Emod <- sum((Qsim - Qobs)^2)
  Eref <- sum((Qobs - Qobs_mean)^2)
  if (Emod == 0 & Eref == 0) {
    NSE <- 0
  } else {
    NSE <- (1 - Emod / Eref)
  }
  NSE
}

RMSE <- function(Qsim, Qobs){
  df <- data.frame(Qsim = Qsim, Qobs = Qobs)
  df <- na.omit(df)
  
  if(nrow(df) == 0) return(NA)
  
  Qsim <- df$Qsim
  Qobs <- df$Qobs
  
  sqrt(mean((Qsim - Qobs)^2))
}

Bias <- function(Qsim, Qobs){
  df <- data.frame(Qsim = Qsim, Qobs = Qobs)
  df <- na.omit(df)
  
  if(nrow(df) == 0) return(NA)
  
  Qsim <- df$Qsim
  Qobs <- df$Qobs
  
  sumQobs <- sum(Qobs)
  sumQsim <- sum(Qsim)
  if (sumQobs == 0) {
    BIAS <- 0
  } else {
    BIAS <- (sumQsim - sumQobs) / sumQobs
  }
  BIAS
}

# 美化后的对比图函数
plot_comparison_base <- function(sim_data, obs_data, title) {
  # 创建数据框
  plot_data <- data.frame(sim = sim_data, obs = obs_data)
  plot_data <- na.omit(plot_data)
  
  # 输出用于拟合的数据点数量
  cat("\n用于", title, "的有效数据点数量:", nrow(plot_data), "\n")
  
  # 计算统计指标
  r2 <- round(cor(plot_data$sim, plot_data$obs)^2, 3)
  rmse <- round(sqrt(mean((plot_data$sim - plot_data$obs)^2)), 3)
  lm_result <- lm(obs ~ sim, data = plot_data)
  slope <- round(coef(lm_result)[2], 3)
  intercept <- round(coef(lm_result)[1], 3)
  
  # 计算绘图范围
  max_val <- max(c(plot_data$sim, plot_data$obs))
  plot_range <- c(0, max_val * 1.1)
  
  # 设置绘图参数
  par(mar = c(5, 5, 4, 2))  # 增加边距
  par(mgp = c(3.5, 1, 0))   # 调整轴标签位置
  
  # 绘制散点图
  plot(plot_data$sim, plot_data$obs,
       xlim = plot_range, ylim = plot_range,
       xlab = "站点观测VS VPM-10KM",
       ylab = "(gC m⁻² 5day⁻¹)",
       main = title,
       pch = 21,              # 圆形带边框的点
       col = "black",         # 点的边框颜色
       bg = rgb(0, 0, 1, 0.5), # 半透明蓝色填充
       cex = 1.2,             # 点的大小
       cex.lab = 1.2,         # 轴标签字体大小
       cex.axis = 1.1,        # 轴刻度字体大小
       cex.main = 1.3,        # 标题字体大小
       font.lab = 2,          # 轴标签字体加粗
       font.main = 2)         # 标题字体加粗
  
  # 添加网格线
  grid(col = "gray90", lty = "dotted")
  
  # 添加1:1线
  abline(0, 1, lty = 2, col = "gray50", lwd = 2)
  
  # 添加拟合线
  abline(lm_result, col = "red", lwd = 2)
  
  # 添加统计信息
  legend_text <- c(
    sprintf("R² = %.3f", r2),
    sprintf("RMSE = %.3f", rmse),
    sprintf("slope = %.3f", slope),
    sprintf("intercept = %.3f", intercept)
  )
  
  # 添加半透明背景的图例框
  legend("topleft",
         legend = legend_text,
         bty = "n",           # 移除边框
         bg = rgb(1, 1, 1, 0.8), # 半透明白色背景
         box.col = "gray70",  # 边框颜色
         cex = 1.1,           # 文字大小
         text.font = 1,       # 文字字体
         inset = 0.02)        # 与边框的距离
  
  # 显示统计指标
  cat("\n", title, "的统计指标:\n")
  cat("R²:", r2, "\n")
  cat("RMSE:", rmse, "\n")
  cat("NSE:", round(NSE(plot_data$sim, plot_data$obs), 3), "\n")
  cat("Bias:", round(Bias(plot_data$sim, plot_data$obs), 3), "\n")
}

5.输入代码后选定文件即可执行啦

多数据对比:

# 安装和加载必要的包
if (!require(ggplot2)) install.packages("ggplot2")
if (!require(dplyr)) install.packages("dplyr")
if (!require(data.table)) install.packages("data.table")

library(ggplot2)
library(dplyr)
library(data.table)

# 设置系统编码
Sys.setlocale("LC_ALL", "Chinese")

# 让用户输入文件路径
cat("请输入CSV文件的完整路径(或按回车使用文件选择对话框):\n")
file_path <- readline()

# 如果用户没有输入路径,则使用文件选择对话框
if (file_path == "") {
  cat("请在弹出窗口中选择文件...\n")
  file_path <- file.choose()
}

# 在tryCatch中使用file_path替换硬编码的路径
tryCatch({
  # 尝试不同的编码方式
  encodings <- c("UTF-8", "GBK", "GB18030", "BIG5")
  data <- NULL
  
  for (enc in encodings) {
    tryCatch({
      cat("尝试使用", enc, "编码读取文件...\n")
      temp_data <- read.csv(file_path,
                           fileEncoding = enc,
                           check.names = FALSE,
                           stringsAsFactors = FALSE)
      if (!is.null(temp_data) && nrow(temp_data) > 0 && ncol(temp_data) > 0) {
        data <- temp_data
        cat("成功使用", enc, "编码读取文件\n")
        break
      }
    }, error = function(e) {
      cat("使用", enc, "编码失败:", conditionMessage(e), "\n")
    })
  }
  
  # 读取数据后,显示列名
  cat("\n读取到的列名:\n")
  print(names(data))
  
  # 只检查必需的三列
  required_cols <- c("站点观测", "VPM10km", "VPM500m")
  
  # 标准化列名(移除特殊字符并统一格式)
  standardize_colname <- function(name) {
    name <- gsub("[^a-zA-Z0-9\u4e00-\u9fa5]", "", name)  # 只保留字母、数字和中文
    return(tolower(name))        # 转换为小写
  }
  
  # 创建列名映射
  column_mapping <- list(
    "站点观测" = c("站点观测", "站点_观测", "观测站点", "观测"),
    "VPM10km" = c("VPM10km", "VPM-10km", "VPM_10km", "vpm10km", "VPM 10km"),
    "VPM500m" = c("VPM500m", "VPM-500m", "VPM_500m", "vpm500m", "VPM 500m")
  )
  
  # 标准化当前列名
  current_cols <- names(data)
  standardized_current <- sapply(current_cols, standardize_colname)
  
  # 检查并重命名列
  for (expected_col in required_cols) {
    variants <- column_mapping[[expected_col]]
    standardized_variants <- sapply(variants, standardize_colname)
    
    # 查找匹配的列名
    found <- FALSE
    for (i in seq_along(current_cols)) {
      if (standardized_current[i] %in% standardized_variants) {
        if (current_cols[i] != expected_col) {
          data[[expected_col]] <- data[[current_cols[i]]]
          data[[current_cols[i]]] <- NULL
        }
        found <- TRUE
        break
      }
    }
    
    if (!found) {
      stop(sprintf("找不到必需的列: %s", expected_col))
    }
  }
  
  # 验证最终的列名
  cat("\n最终的列名:\n")
  print(names(data))
  
  # 检查并转换所有数据列为数值型
  for (col in names(data)) {
    data[[col]] <- as.numeric(as.character(data[[col]]))
    if (all(is.na(data[[col]]))) {
      stop(sprintf("列 '%s' 包含无效数据", col))
    }
  }
  
  # 显示数据基本信息
  cat("\n数据维度:", dim(data), "\n")
  cat("列名:", names(data), "\n")
  cat("\n数据前几行:\n")
  print(head(data))
  
  # 读取数据后,先检查数据的完整性
  cat("\n数据总行数:", nrow(data), "\n")
  
  # 检查每列的数据范围和异常值
  for (col in names(data)) {
    cat("\n", col, "的统计信息:\n")
    cat("最小值:", min(data[[col]], na.rm = TRUE), "\n")
    cat("最大值:", max(data[[col]], na.rm = TRUE), "\n")
    cat("平均值:", mean(data[[col]], na.rm = TRUE), "\n")
    cat("中位数:", median(data[[col]], na.rm = TRUE), "\n")
    cat("NA值数量:", sum(is.na(data[[col]])), "\n")
    
    # 检查异常值(超过3个标准差的值)
    mean_val <- mean(data[[col]], na.rm = TRUE)
    sd_val <- sd(data[[col]], na.rm = TRUE)
    outliers <- data[[col]][abs(data[[col]] - mean_val) > 3 * sd_val]
    if(length(outliers) > 0) {
      cat("可能的异常值:", outliers, "\n")
    }
  }
  
  # 检查x和y轴的数据对应关系
  cat("\n检查站点观测和VPM10km的对应关系:\n")
  correlation <- cor(data$站点观测, data$VPM10km, use = "complete.obs")
  cat("相关系数:", correlation, "\n")
  
  # 创建两个子图的布局
  par(mfrow = c(1, 2))

  # 第一个子图:10km分辨率的对比
  plot_data_10km <- data.frame(
    obs = c(data$站点观测, data$站点观测),
    sim = c(data$VPM10km, data$`MODIS-10KM`),
    type = factor(rep(c("VPM-10km", "MODIS-10km"), each = nrow(data)))
  )
  plot_data_10km <- na.omit(plot_data_10km)

  # 第一个子图的设置和绘制
  max_val_10km <- max(c(plot_data_10km$sim, plot_data_10km$obs))
  plot_range_10km <- c(0, max_val_10km * 1.1)

  plot(NULL,
       xlim = plot_range_10km, ylim = plot_range_10km,
       xlab = "站点观测",
       ylab = "(gC m⁻² 5day⁻¹)",
       main = "10km分辨率产品对比",
       cex.lab = 1.2,
       cex.axis = 1.1,
       cex.main = 1.3,
       font.lab = 2,
       font.main = 2)

  grid(col = "gray90", lty = "dotted")
  abline(0, 1, lty = 2, col = "gray50", lwd = 2)

  # VPM-10km点和拟合线
  points(plot_data_10km$obs[plot_data_10km$type == "VPM-10km"],
         plot_data_10km$sim[plot_data_10km$type == "VPM-10km"],
         pch = 21,
         col = "black",
         bg = rgb(1, 0, 0, 0.5),
         cex = 1.2)

  # MODIS-10km点和拟合线
  points(plot_data_10km$obs[plot_data_10km$type == "MODIS-10km"],
         plot_data_10km$sim[plot_data_10km$type == "MODIS-10km"],
         pch = 21,
         col = "black",
         bg = rgb(0, 0, 1, 0.5),
         cex = 1.2)

  # 添加拟合线
  lm_vpm_10km <- lm(sim ~ obs, data = plot_data_10km[plot_data_10km$type == "VPM-10km",])
  lm_modis_10km <- lm(sim ~ obs, data = plot_data_10km[plot_data_10km$type == "MODIS-10km",])
  abline(lm_vpm_10km, col = "red", lwd = 2)
  abline(lm_modis_10km, col = "blue", lwd = 2)

  # 计算统计指标
  r2_vpm_10km <- round(cor(data$VPM10km, data$站点观测, use = "complete.obs")^2, 3)
  r2_modis_10km <- round(cor(data$`MODIS-10KM`, data$站点观测, use = "complete.obs")^2, 3)
  rmse_vpm_10km <- round(RMSE(data$VPM10km, data$站点观测), 3)
  rmse_modis_10km <- round(RMSE(data$`MODIS-10KM`, data$站点观测), 3)

  # 添加图例
  legend("topleft",
         legend = c(
           "VPM-10km",
           sprintf("R² = %.3f", r2_vpm_10km),
           sprintf("RMSE = %.3f", rmse_vpm_10km),
           "",
           "MODIS-10km",
           sprintf("R² = %.3f", r2_modis_10km),
           sprintf("RMSE = %.3f", rmse_modis_10km)
         ),
         pch = c(21, NA, NA, NA, 21, NA, NA),
         pt.bg = c(rgb(1, 0, 0, 0.5), NA, NA, NA, rgb(0, 0, 1, 0.5), NA, NA),
         col = c("black", NA, NA, NA, "black", NA, NA),
         pt.cex = 1.2,
         bty = "n",
         bg = rgb(1, 1, 1, 0.8),
         cex = 1.1,
         text.font = 1,
         inset = 0.02)

  # 第二个子图:500m分辨率的对比
  plot_data_500m <- data.frame(
    obs = c(data$站点观测, data$站点观测),
    sim = c(data$VPM500m, data$`MODIS-500m`),
    type = factor(rep(c("VPM-500m", "MODIS-500m"), each = nrow(data)))
  )
  plot_data_500m <- na.omit(plot_data_500m)

  # 第二个子图的设置和绘制
  max_val_500m <- max(c(plot_data_500m$sim, plot_data_500m$obs))
  plot_range_500m <- c(0, max_val_500m * 1.1)

  plot(NULL,
       xlim = plot_range_500m, ylim = plot_range_500m,
       xlab = "站点观测",
       ylab = "(gC m⁻² 5day⁻¹)",
       main = "500m分辨率产品对比",
       cex.lab = 1.2,
       cex.axis = 1.1,
       cex.main = 1.3,
       font.lab = 2,
       font.main = 2)

  grid(col = "gray90", lty = "dotted")
  abline(0, 1, lty = 2, col = "gray50", lwd = 2)

  # VPM-500m点和拟合线
  points(plot_data_500m$obs[plot_data_500m$type == "VPM-500m"],
         plot_data_500m$sim[plot_data_500m$type == "VPM-500m"],
         pch = 21,
         col = "black",
         bg = rgb(1, 0, 0, 0.5),
         cex = 1.2)

  # MODIS-500m点和拟合线
  points(plot_data_500m$obs[plot_data_500m$type == "MODIS-500m"],
         plot_data_500m$sim[plot_data_500m$type == "MODIS-500m"],
         pch = 21,
         col = "black",
         bg = rgb(0, 0, 1, 0.5),
         cex = 1.2)

  # 添加拟合线
  lm_vpm_500m <- lm(sim ~ obs, data = plot_data_500m[plot_data_500m$type == "VPM-500m",])
  lm_modis_500m <- lm(sim ~ obs, data = plot_data_500m[plot_data_500m$type == "MODIS-500m",])
  abline(lm_vpm_500m, col = "red", lwd = 2)
  abline(lm_modis_500m, col = "blue", lwd = 2)

  # 计算统计指标
  r2_vpm_500m <- round(cor(data$VPM500m, data$站点观测, use = "complete.obs")^2, 3)
  r2_modis_500m <- round(cor(data$`MODIS-500m`, data$站点观测, use = "complete.obs")^2, 3)
  rmse_vpm_500m <- round(RMSE(data$VPM500m, data$站点观测), 3)
  rmse_modis_500m <- round(RMSE(data$`MODIS-500m`, data$站点观测), 3)

  # 添加图例
  legend("topleft",
         legend = c(
           "VPM-500m",
           sprintf("R² = %.3f", r2_vpm_500m),
           sprintf("RMSE = %.3f", rmse_vpm_500m),
           "",
           "MODIS-500m",
           sprintf("R² = %.3f", r2_modis_500m),
           sprintf("RMSE = %.3f", rmse_modis_500m)
         ),
         pch = c(21, NA, NA, NA, 21, NA, NA),
         pt.bg = c(rgb(1, 0, 0, 0.5), NA, NA, NA, rgb(0, 0, 1, 0.5), NA, NA),
         col = c("black", NA, NA, NA, "black", NA, NA),
         pt.cex = 1.2,
         bty = "n",
         bg = rgb(1, 1, 1, 0.8),
         cex = 1.1,
         text.font = 1,
         inset = 0.02)

  # 恢复原始绘图参数
  par(mfrow = c(1, 1))
  
}, error = function(e) {
  cat("错误信息:", conditionMessage(e), "\n")
  cat("\n请检查:\n")
  cat("1. 文件是否存在\n")
  cat("2. 文件是否为标准CSV格式\n")
  cat("3. 文件是否包含所需的所有列\n")
  cat("4. 数据是否为有效的数值\n")
  cat("5. 文件编码是否正确\n")
})

NSE <- function(Qsim, Qobs){
  # 创建数据框而不是data.table
  df <- data.frame(Qsim = Qsim, Qobs = Qobs)
  df <- na.omit(df)
  
  # 如果数据为空,返回NA
  if(nrow(df) == 0) return(NA)
  
  Qsim <- df$Qsim
  Qobs <- df$Qobs
  
  Qobs_mean <- mean(Qobs)
  Emod <- sum((Qsim - Qobs)^2)
  Eref <- sum((Qobs - Qobs_mean)^2)
  if (Emod == 0 & Eref == 0) {
    NSE <- 0
  } else {
    NSE <- (1 - Emod / Eref)
  }
  NSE
}

RMSE <- function(Qsim, Qobs){
  df <- data.frame(Qsim = Qsim, Qobs = Qobs)
  df <- na.omit(df)
  
  if(nrow(df) == 0) return(NA)
  
  Qsim <- df$Qsim
  Qobs <- df$Qobs
  
  sqrt(mean((Qsim - Qobs)^2))
}

Bias <- function(Qsim, Qobs){
  df <- data.frame(Qsim = Qsim, Qobs = Qobs)
  df <- na.omit(df)
  
  if(nrow(df) == 0) return(NA)
  
  Qsim <- df$Qsim
  Qobs <- df$Qobs
  
  sumQobs <- sum(Qobs)
  sumQsim <- sum(Qsim)
  if (sumQobs == 0) {
    BIAS <- 0
  } else {
    BIAS <- (sumQsim - sumQobs) / sumQobs
  }
  BIAS
}

# 美化后的对比图函数
plot_comparison_base <- function(sim_data, obs_data, title) {
  # 创建数据框
  plot_data <- data.frame(sim = sim_data, obs = obs_data)
  plot_data <- na.omit(plot_data)
  
  # 输出用于拟合的数据点数量
  cat("\n用于", title, "的有效数据点数量:", nrow(plot_data), "\n")
  
  # 计算统计指标
  r2 <- round(cor(plot_data$sim, plot_data$obs)^2, 3)
  rmse <- round(sqrt(mean((plot_data$sim - plot_data$obs)^2)), 3)
  lm_result <- lm(obs ~ sim, data = plot_data)
  slope <- round(coef(lm_result)[2], 3)
  intercept <- round(coef(lm_result)[1], 3)
  
  # 计算绘图范围
  max_val <- max(c(plot_data$sim, plot_data$obs))
  plot_range <- c(0, max_val * 1.1)
  
  # 设置绘图参数
  par(mar = c(5, 5, 4, 2))  # 增加边距
  par(mgp = c(3.5, 1, 0))   # 调整轴标签位置
  
  # 绘制散点图
  plot(plot_data$sim, plot_data$obs,
       xlim = plot_range, ylim = plot_range,
       xlab = "站点观测VS VPM-10KM",
       ylab = "(gC m⁻² 5day⁻¹)",
       main = title,
       pch = 21,              # 圆形带边框的点
       col = "black",         # 点的边框颜色
       bg = rgb(0, 0, 1, 0.5), # 半透明蓝色填充
       cex = 1.2,             # 点的大小
       cex.lab = 1.2,         # 轴标签字体大小
       cex.axis = 1.1,        # 轴刻度字体大小
       cex.main = 1.3,        # 标题字体大小
       font.lab = 2,          # 轴标签字体加粗
       font.main = 2)         # 标题字体加粗
  
  # 添加网格线
  grid(col = "gray90", lty = "dotted")
  
  # 添加1:1线
  abline(0, 1, lty = 2, col = "gray50", lwd = 2)
  
  # 添加拟合线
  abline(lm_result, col = "red", lwd = 2)
  
  # 添加统计信息
  legend_text <- c(
    sprintf("R² = %.3f", r2),
    sprintf("RMSE = %.3f", rmse),
    sprintf("slope = %.3f", slope),
    sprintf("intercept = %.3f", intercept)
  )
  
  # 添加半透明背景的图例框
  legend("topleft",
         legend = legend_text,
         bty = "n",           # 移除边框
         bg = rgb(1, 1, 1, 0.8), # 半透明白色背景
         box.col = "gray70",  # 边框颜色
         cex = 1.1,           # 文字大小
         text.font = 1,       # 文字字体
         inset = 0.02)        # 与边框的距离
  
  # 显示统计指标
  cat("\n", title, "的统计指标:\n")
  cat("R²:", r2, "\n")
  cat("RMSE:", rmse, "\n")
  cat("NSE:", round(NSE(plot_data$sim, plot_data$obs), 3), "\n")
  cat("Bias:", round(Bias(plot_data$sim, plot_data$obs), 3), "\n")
}

​

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值