最近拿到一个上游的xlsx, 需要自己加工处理取到自己想要的值,需要注意的知识点做个记录:
以下内容基于以下假设:
1, 文件名字为Data_A.xlsx
和Data_B.xlsx
, 其内容格式为:
A | B | C | D E | F |
---|---|---|---|---|
0x101 | 10 | 1 | 1024 | |
0x201 | 11 | 1 | 117 | |
0x301 | 12 | 3 | 31 | |
0x401 | 13 | 2 | 41 | |
0x501 | 14 | 1 | 126 | |
0x601 | 15 | 4 | 13 |
0 主要程序框架
def clean():
current_path = os.getcwd()
for infile in glob.glob( os.path.join(current_path, '*.csv') ):
os.remove(infile)
if __name__ == '__main__':
clean()
sys.exit()
1 从用户输入的cmd line读取xlsx文件
def parse_input(argv):
inputfile = ''
try:
opts, args = getopt.getopt(sys.argv[1:],"hi:",["help","ifile="])
except getopt.GetoptError:
sys.exit(2)
for opt, arg in opts:
if opt in ("-h","--help"):
print('python3 parse_xlsx.py -i <inputfile>')
sys.exit()
elif opt in ("-i", "--ifile"):
inputfile = arg
return inputfile
2 读取xlsx文件为raw.csv
文件
def parse_xlsx(file):
current_path = os.getcwd()
filepath = current_path+'\\new.csv'
if (os.path.exists(filepath)) :
os.remove(filepath)
rawdf = pd.read_excel(file, sheet_name=0)
DataFrame(rawdf).to_csv('raw.csv', index=False, header=0)
3 读取raw.csv
文件
newdf = pd.read_csv('raw.csv', sep=',', header=0, names=None)
4 将D E
列进行处理:以此列的值作为排序依据先进行排序
newdf = pd.read_csv('raw.csv', sep=',', header=0, names=None)
newdf_temp = newdf
if any(newdf_temp['D E'] == 2):
newdf_order_1 = newdf_temp.loc[newdf_temp['D E'] == 1]
newdf_order_2 = newdf_temp.loc[newdf_temp['D E'] == 2]
newdf = newdf_order_1._append(newdf_order_2, ignore_index=True)
newdf_order_3 = newdf_temp.loc[newdf_temp['D E'] == 3]
newdf = newdf._append(newdf_order_3, ignore_index=True)
newdf_order_4 = newdf_temp.loc[newdf_temp['D E'] == 4]
newdf = newdf._append(newdf_order_4, ignore_index=True)
5 将A
列进行处理:去掉0x
, 并按8位对齐
newdf['a'] = newdf['A'].astype('str') # the value of A column will handled as string
newdf['a'] = newdf['a'].str.replace(r"0x", "")
newdf_a = newdf['a'].str.zfill(8)
6 将B
列进行处理:转成十六进制数, 并按8位对齐
newdf['b'] = newdf['B'].astype('int')
newdf['b'] = newdf['b'].apply( hex )
newdf['b'] = newdf['b'].str.replace(r"0x", "")
newdf_b = newdf['b'].str.zfill(8)
7 将C
列进行处理:全部为NaN, 忽略
8 将F
列进行处理:转成十六进制数, 并按8位对齐
newdf['f'] = newdf['F'].astype('int')
newdf['f'] = newdf['f'].apply( hex )
newdf['f'] = newdf['f'].str.replace(r"0x", "")
newdf_f = newdf['f'].str.zfill(8)
9 把上面处理完的A
、B
、F
列进行拼接,结果存进new.csv
此处要注意拼接的前后顺序,注意是列拼接
newdf = newdf_a + newdf_b + newdf_f
DataFrame(newdf).to_csv('new.csv', index=False, header=False)
10 区分A
和B
两个文件
def distinguish(file):
if "a".upper() in file.upper():
bfile = "A.bin"
elif "b".upper() in file.upper():
bfile = "B.bin"
else:
print("Please confirm input is a or b")
11 将传入的数据每8位作为一组,进行大小端转化
def byteorder_convert(para):
little_hex = bytearray.fromhex(para)
i =0
tmp = bytearray()
result = bytearray()
while i < len(little_hex):
tmp = little_hex[i:i+4]
tmp.reverse()
result = result + tmp
# print(result)
i += 4
str_little = "".join(format(x, "02x") for x in result)
return str_little
12.a 对每一行进行处理,结果存进bfile
中
def binary(bfile):
with open('new.csv', 'r') as f:
reader = csv.reader(f)
# print(type(reader))
file = open(bfile, "wb")
for row in reader:
row = byteorder_convert(''.join(row))
# print(row)
result = ''
t = '\\x'
i = 0
if len(row)%2 == 1:
print('String length illegal')
while i < len(row):
result = result + t + row[i:i+2]
i = i+2
s = result
b = eval('b' + '\'' + s + '\'')
# print(result)
# print(b)
file.write(bytes(b))
file.close()
print("Done! Please check", bfile)
12.b 对每一行进行处理,结果存进bfile
中
def array(file, newdf_a, newdf_b, newdf_f):
if "a".upper() in file.upper():
afile = "new_a.c"
elif "b".upper() in file.upper():
afile = "new_b.c"
else:
print("Please confirm input is a or b")
newdf = "{" + ".a = "+ newdf_a + ", .b = " + newdf_b + ", .f = " + newdf_f + "}"
DataFrame(newdf).to_csv('new.csv', index=False, header=False)
newdf = pd.read_csv('new.csv', sep=',', header=0, names=None)
# print(newdf)
DataFrame(newdf).to_csv(afile, sep=' ', index=False, header=False, quoting=csv.QUOTE_NONE, escapechar=' ')
print("Done! Please check", afile)
13 删除过程中的csv
中间文件
def clean():
current_path = os.getcwd()
for infile in glob.glob( os.path.join(current_path, '*.csv') ):
os.remove(infile)