删除轨迹异常值并三次样条插值。


from matplotlib import pyplot as plt
import pickle
from scipy.interpolate import CubicSpline
from torch.utils.data import Dataset
from scipy import interpolate
from datetime import datetime
import numpy as np
import torch
import pandas as pd
import os

# 定义输入和输出文件夹路径
input_folder = './data/dataset/raw_78/'
output_folder = './data/dataset/output78'

# 获取输入文件夹中的所有文件路径
input_files = os.listdir(input_folder)

# 循环处理每个输入文件
for input_file in input_files:
    # 构建输入文件的完整路径
    input_file_path = os.path.join(input_folder, input_file)

    # 从输入文件路径中提取文件名和文件扩展名
    file_name = os.path.basename(input_file)
    file_name_without_ext = os.path.splitext(file_name)[0]

    try:
        # 读取 CSV 文件,并指定正确的编码方式
        data = pd.read_csv(input_file_path, encoding='utf-8')
                # 设置每个列的异常值条件判断
        column_conditions = {

            'lon': lambda x: 0 <= x <= 180,
            'lat': lambda x: 0 <= x < 90,
            'sog': lambda x: 0 < x,
            'cog': lambda x: 0 < x <= 360,
        }
        # 处理数据集
        for column, condition in column_conditions.items():
            if not all(condition(x) for x in data[column]):
                data = data.drop(data.index[data[column].apply(lambda x: not condition(x))])
        data['timestamp'] = pd.to_datetime(data['time'], format='%Y/%m/%d %H:%M')
    # 计算时间间隔
        data['time_diff'] = data['timestamp'].diff().dt.total_seconds()
        # print(data['time_diff'])
        # print(data)
    # 选择需要插值处理的列

        interpolate_cols = data.columns[1:6]
        outdata = data[interpolate_cols]
        # out_cols = data.columns[1:6].append(data.columns[7:8])
        out_cols = data.columns[1:7]
        outdata_withtime = data[out_cols]
        print(data.columns)
        print(outdata_withtime)


        print(outdata.columns)

        print(outdata_withtime.columns)
    # 判断是否需要插值
        time_threshold = 600  # 10分钟,单位为秒

    # 超过时间间隔阈值的数据列索引f
        interpolate_indices = np.where(data['time_diff'] > time_threshold)[0]


        for idx in interpolate_indices:

          newrow=[]
          T=[]
          for col in interpolate_cols:
             # 获取需要插值处理的列数据
             y = data[col].values

             # 选取插值起始和结束的时间戳
             start_time = data['timestamp'].iloc[idx - 1]
             end_time = data['timestamp'].iloc[idx]
             # 进行线性插值处理
             # f = interpolate.interp1d([start_time.timestamp(), end_time.timestamp()], [y[idx - 1], y[idx]])
             # new_timestamps = pd.date_range(start=start_time, end=end_time, freq='10T')[1:-1]
             # new_values = f([timestamp.timestamp() for timestamp in new_timestamps])
             # 三次样条插值
             cs = CubicSpline([start_time.timestamp(), end_time.timestamp()], [y[idx - 1], y[idx]])

             new_timestamps = pd.date_range(start=start_time, end=end_time, freq='10T')[1:-1]
             new_values = cs([timestamp.timestamp() for timestamp in new_timestamps])

             newrow.append(new_values)
          new_timestamps = pd.date_range(start=data['timestamp'].iloc[idx - 1], end=data['timestamp'].iloc[idx], freq='10T')[1:-1]
          # print(new_timestamps)
          matrix = np.vstack(newrow)
          transposed_matrix = np.transpose(matrix)
          rounded_matrix = np.round(transposed_matrix, decimals=4)



          new_data=pd.DataFrame(rounded_matrix)
          new_data.columns = ['lon', 'lat', 'rot', 'sog','cog']
          new_time = pd.DataFrame(new_timestamps)
          new_time.columns = ['timestamp']
          new_data=pd.concat([new_data, new_time], axis=1)

          outdata_withtime = pd.concat([outdata_withtime, new_data])
             # 对时间戳进行排序
        outdata_withtime.sort_values(by='timestamp', inplace=True)
        print(outdata_withtime)
        full_path = os.path.join(output_folder, input_file)
        outdata_withtime.to_csv(full_path, index=False, encoding='utf-8')
        print("finish")
    # 输出处理后的数据



    except UnicodeDecodeError:
        print(f"文件 {input_file_path} 的编码不兼容。请尝试其他编码方式。")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值