# -*- coding: utf-8 -*-
# @Time : 2024/3/9 16:30
# @Author : wangz
# @File : jason_read_and_make.py
from openpyxl import load_workbook
import csv
import numpy as np
import csv
import json
import pandas as pd
# -*- coding: gbk -*-
import sklearn.metrics
def csv_to_json(file_path, json_filepath):
# 打开CSV文件
data = []
# with open(file_path, mode='r', newline='', encoding='utf-8') as csv_file:
with open(file_path, mode='r', encoding='utf-8') as csv_file:
# 创建CSV读取器
csv_reader = csv.reader(csv_file)
# 读取CSV文件的表头(如果有的话)
headers = next(csv_reader)
print(headers)
for row in csv_reader:
# 如果表头存在,则使用表头作为字典的键,否则使用列的索引
if headers:
# print(zip(headers, row))
row_dict = {header: value for header, value in zip(headers, row)}
# print(row_dict)
else:
# print(zip(headers, row))
row_dict = {f'column_{i}': value for i, value in enumerate(row)}
# 读取数据并存储到列表中
# 将行数据添加到列表中
data.append(row_dict)
# 将列表转换为JSON格式并写入到新的JSON文件中
# with open(json_filepath, mode='w', encoding='utf-8') as json_file:
# json.dump(data, json_file, ensure_ascii=False, indent=4)
# return data
def pandas_read(file_path):
CSVFILE = pd.read_csv(file_path, encoding='utf-8')
print(CSVFILE)
def csv_to_txt(input_csv, output_txt):
with open(input_csv, 'r', newline='', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
# 打开TXT文件以写入
with open(output_txt, 'w', encoding='utf-8') as txtfile:
# 遍历CSV文件的每一行
for row in reader:
# 如果行不为空,则获取第一列的值并写入TXT文件
if row:
print(row)
first_column_value = row[0]
txtfile.write(first_column_value + '\n')
def csv_to_csv(input_csv, output_csv):
import csv
# 定义输入和输出文件的路径
# 打开输入CSV文件以读取
with open(input_csv, 'r', newline='', encoding='utf-8') as infile:
reader = csv.reader(infile)
# 打开输出CSV文件以写入
with open(output_csv, 'w', newline='', encoding='utf-8') as outfile:
writer = csv.writer(outfile)
# 遍历CSV文件的每一行,直到读取到前2000行或文件结束
for i, row in enumerate(reader):
# i = i + 200
if i >= 3000:
break # 如果已经读取了2000行,就跳出循环
# print(row[1])
# 获取第一列的值
first_column_value = row[0]
# 将第一列的值写入新的CSV文件
writer.writerow(row)
print("First 2000 values from the first column have been saved to", output_csv)
def is_row_empty(row):
# 检查行中的每个元素,如果所有元素都是空字符串或None,则返回True
return all(not x.strip() for x in row)
def is_cell_empty(cell):
# 检查单元格是否为空字符串或None
return cell is None or (isinstance(cell, str) and not cell.strip())
def check_empty_rows_in_csv(file_path):
with open(file_path, 'r', newline='', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
rows = list(reader)
lengthrow = len(rows)
lengthcolumn = len(rows[0])
# print(lengthrow)
# print(lengthcolumn)
# print(str(63) + ' ' + rows[63][1])
# lengthcolumn = len(rows[0])
for i in range(lengthrow):
# print(i)
for j in range(lengthcolumn - 1):
np.isnan(rows[i][j])
print(i)
print('yes')
if rows == None:
print(str(i) + 'no')
# print(rows[1][0])
def ps_csv(file_path):
aa = pd.read_csv(file_path)
# csv_filepath = r'C:\Users\wangz\Desktop\train.csv' # 替换为你的CSV文件路径
csv_filepath = r'C:\Users\wangz\Desktop\train.csv' # 替换为你的CSV文件路径
json_filepath = r'C:\Users\wangz\Desktop\train.json' # 输出的JSON文件路径
# csv_to_json(csv_filepath, json_filepath)
# pandas_read(csv_filepath)
input_csv = r'C:\Users\wangz\Downloads\JDtest-100.csv'
output_txt = r'C:\Users\wangz\Downloads\JDtest.txt'
csv_to_txt(input_csv, output_txt)
# input_csv = r'C:\Users\wangz\Downloads\JDtrain.csv'
# output_csv = r'C:\Users\wangz\Downloads\JDtest-100.csv'
# csv_to_csv(input_csv, output_csv)
# 运行检查函数
# check_empty_rows_in_csv(output_csv)