一、数据输入位置
二、文件格式
excel文件结构如下:
三、出错代码
import os
import datetime
from calendar import monthrange
def parse_filename(file_path):
"""根据文件获取时间信息"""
try:
filename = os.path.basename(file_path)
date_str = filename.split('_')[0]
year, month, day = map(int, date_str.split('-'))
return year, month, day
except:
raise ValueError(f"无效文件名格式: {file_path}")
def parse_data_time(time_str, file_year, file_month, file_day):
"""处理跨月跨年的日期"""
try:
time_str = time_str.replace(" ", "")
data_day = int(time_str.split("日")[0].strip())
hour_part = time_str.split("日")[1].split("时")[0].strip()
hour = int(hour_part.split("分")[0]) if "分" in hour_part else int(hour_part)
current_date = datetime.datetime(file_year, file_month, file_day)
if data_day < file_day:
current_date += datetime.timedelta(days=1)
year = current_date.year
month = current_date.month
max_day = monthrange(year, month)[1]
while data_day > max_day:
month += 1
if month > 12:
year += 1
month = 1
max_day = monthrange(year, month)[1]
return datetime.datetime(year, month, data_day, hour)
except Exception as e:
raise ValueError(f"时间解析失败: {time_str}")
def process_files(file_list, start_date, end_date):
"""获取站点对应的日期及流量"""
station_name = "大通"
valid_data = []
for file in file_list:
try:
file_year, file_month, file_day = parse_filename(file)
except ValueError as e:
print(e)
continue
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
if not line.startswith(station_name):
continue
cols = [col.strip() for col in line.strip().split(',')]
if len(cols) < 4:
continue
time_str = cols[1]
discharge_str = cols[3]
try:
discharge = float(discharge_str)
except:
continue
try:
dt = parse_data_time(time_str, file_year, file_month, file_day)
except:
continue
if start_date <= dt.date() <= end_date:
valid_data.append((dt, discharge))
if not valid_data:
return f"{station_name}所在行数据解析失败"
valid_data.sort()
base_time = valid_data[0][0].replace(hour=0, minute=0, second=0, microsecond=0)
reference_date = base_time.strftime("%Y%m%d")
records = []
for dt, q in valid_data:
delta_time = (dt - base_time).total_seconds() / 60
records.append(f" {delta_time:.7e} {q:.7e} 9.9999900e+02")
output = [
f"table-name 'Boundary Section : 1'",
f"contents 'Uniform '",
f"location 'totalDischarge '",
f"time-function 'non-equidistant'",
f"reference-time {reference_date}",
f"time-unit 'minutes'",
f"interpolation 'linear'",
"parameter 'time ' unit '[min]'",
"parameter 'total discharge (t) end A' unit '[m3/s]'",
"parameter 'total discharge (t) end B' unit '[m3/s]'",
f"records-in-table {len(records)}",
] + records
return '\n'.join(output)
def get_date(prompt):
"""获取输入的日期(格式:YYYYMMDD)"""
while True:
date_str = input(prompt)
if not date_str:
return None
try:
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
except ValueError:
print("日期格式错误,请重新输入!")
if __name__ == "__main__":
dir_path = "../2018"
file_list = [
os.path.join(dir_path, f)
for f in os.listdir(dir_path)
if f.endswith('.csv') and os.path.isfile(os.path.join(dir_path, f))
]
start_date_input = get_date("请输入起始日期(格式:YYYYMMDD,连按两次回车默认全部输出):")
end_date_input = get_date("请输入结束日期(格式:YYYYMMDD,连按两次回车默认全部输出):")
start_date = start_date_input or datetime.date.min
end_date = end_date_input or datetime.date.max
if start_date > end_date:
print("起始日期不能比结束日期晚!")
exit()
result = process_files(file_list, start_date, end_date)
start_str = start_date.strftime('%Y%m%d')
end_str = end_date.strftime('%Y%m%d')
output_file = f"{start_str}_{end_str}.bct"
with open(output_file, "w", encoding="utf-8") as f:
f.write(result)
print(repr(result))
print(f"已保存至{output_file}")
刚开始用的如上代码生成bct流量文件,但系统貌似不能识别我的文件,一直提示:
All residing boundary data retained.
四、问题分析
1、统一变量
手动输入一天的内的流量数据并从软件导出bct文件,代码也只选择一天的数据导出
2、文件比较
系统生成的文件大小:
代码生成:
将文件后缀改为txt并比对两文件大小,不难看出我的代码可能缺少关键标识,接下来只需逐行对比两文件的二进制字符即可,对比代码如下:
def compare(file1, file2, output_diff_file=None):
with open(file1, 'rb') as f1, open(file2, 'rb') as f2:
bytes1 = f1.read()
bytes2 = f2.read()
len1 = len(bytes1)
len2 = len(bytes2)
print(f' {file1}: {len1} 字节')
print(f' {file2}: {len2} 字节')
# 比较字节
max_len = max(len1, len2)
differences = []
for i in range(max_len):
b1 = bytes1[i] if i < len1 else None
b2 = bytes2[i] if i < len2 else None
if b1 != b2:
differences.append(f'第{i+1}字节处: {b1}————{b2}')
if differences:
if output_diff_file:
with open(output_diff_file, 'w', encoding='utf-8') as f:
f.write('\n'.join(differences))
print(f'差异见文件{output_diff_file}')
else:
for diff in differences:
print(diff)
else:
print('两文件相同')
compare('./111 - 副本.txt', './333.txt', output_diff_file='binary_diff.txt')
运行结果
第871字节处: 13————None
第872字节处: 10————None
查阅一下13和10代表的字符:
13 对应字符 CR(\r,回车)。
10 对应字符 LF(\n,换行符)。
原来是文件的最后一行没有加换行符,系统识别不到结束标识,才引发上述问题
五、最终代码
关键改动:
# 为最后一行末尾添加换行符
if output and not output[-1].endswith('\r\n'):
output[-1] += '\r\n'
print(repr(output[-1]))
完整代码如下:
import os
import datetime
from calendar import monthrange
def parse_filename(file_path):
"""根据文件获取时间信息"""
try:
filename = os.path.basename(file_path)
date_str = filename.split('_')[0]
year, month, day = map(int, date_str.split('-'))
return year, month, day
except:
raise ValueError(f"无效文件名格式: {file_path}")
def parse_data_time(time_str, file_year, file_month, file_day):
"""处理跨月跨年的日期"""
try:
time_str = time_str.replace(" ", "")
data_day = int(time_str.split("日")[0].strip())
hour_part = time_str.split("日")[1].split("时")[0].strip()
hour = int(hour_part.split("分")[0]) if "分" in hour_part else int(hour_part)
current_date = datetime.datetime(file_year, file_month, file_day)
if data_day < file_day:
current_date += datetime.timedelta(days=1)
year = current_date.year
month = current_date.month
max_day = monthrange(year, month)[1]
while data_day > max_day:
month += 1
if month > 12:
year += 1
month = 1
max_day = monthrange(year, month)[1]
return datetime.datetime(year, month, data_day, hour)
except Exception as e:
raise ValueError(f"时间解析失败: {time_str}")
def process_files(file_list, start_date, end_date):
"""获取站点对应的日期及流量"""
station_name = "大通"
valid_data = []
for file in file_list:
try:
file_year, file_month, file_day = parse_filename(file)
except ValueError as e:
print(e)
continue
with open(file, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
if not line.startswith(station_name):
continue
cols = [col.strip() for col in line.strip().split(',')]
if len(cols) < 4:
continue
time_str = cols[1]
discharge_str = cols[3]
try:
discharge = float(discharge_str)
except:
continue
try:
dt = parse_data_time(time_str, file_year, file_month, file_day)
except:
continue
if start_date <= dt.date() <= end_date:
valid_data.append((dt, discharge))
if not valid_data:
return f"{station_name}所在行数据解析失败"
valid_data.sort()
base_time = valid_data[0][0].replace(hour=0, minute=0, second=0, microsecond=0)
reference_date = base_time.strftime("%Y%m%d")
records = []
for dt, q in valid_data:
delta_time = (dt - base_time).total_seconds() / 60
records.append(f" {delta_time:.7e} {q:.7e} 9.9999900e+02")
output = [
f"table-name 'Boundary Section : 1'",
f"contents 'Uniform '",
f"location 'totalDischarge '",
f"time-function 'non-equidistant'",
f"reference-time {reference_date}",
f"time-unit 'minutes'",
f"interpolation 'linear'",
"parameter 'time ' unit '[min]'",
"parameter 'total discharge (t) end A' unit '[m3/s]'",
"parameter 'total discharge (t) end B' unit '[m3/s]'",
f"records-in-table {len(records)}",
] + records
# 强制使用CRLF为换行符(Unix 的行结束约定 '\n'(LF)、Windows 的约定 '\r\n'(CRLF))
# 为最后一行末尾添加换行符
if output and not output[-1].endswith('\r\n'):
output[-1] += '\r\n'
print(repr(output[-1]))
return '\n'.join(output)
def get_date(prompt):
"""获取输入的日期(格式:YYYYMMDD)"""
while True:
date_str = input(prompt)
if not date_str:
return None
try:
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
except ValueError:
print("日期格式错误,请重新输入!")
if __name__ == "__main__":
dir_path = "../2018"
file_list = [
os.path.join(dir_path, f)
for f in os.listdir(dir_path)
if f.endswith('.csv') and os.path.isfile(os.path.join(dir_path, f))
]
start_date_input = get_date("请输入起始日期(格式:YYYYMMDD,连按两次回车默认全部输出):")
end_date_input = get_date("请输入结束日期(格式:YYYYMMDD,连按两次回车默认全部输出):")
start_date = start_date_input or datetime.date.min
end_date = end_date_input or datetime.date.max
if start_date > end_date:
print("起始日期不能比结束日期晚!")
exit()
result = process_files(file_list, start_date, end_date)
start_str = start_date.strftime('%Y%m%d')
end_str = end_date.strftime('%Y%m%d')
output_file = f"{start_str}_{end_str}.bct"
with open(output_file, "w", encoding="utf-8") as f:
f.write(result)
print(repr(result))
print(f"结果已保存至文件: {output_file}")