代码如下
import os
import pandas as pd
import re
# 获取目标文件夹的路径
filedir = 'D:\xxxx'
# 获取当前文件夹中的文件名称列表
filenames = os.listdir(filedir)
# 打开当前目录下的result.txt文件,如果没有则创建
post_title = []
post_kind = []
post_content = []
i = 0
# 先遍历文件名
for filename in filenames:
i += 1
print(i)
keystart1='xxx'
keyend1='xx'
keystart2='xxxx'
keyend2='xxxxx'
if i > 0:
filepath = filedir + '\\' + filename
print(filepath[:-4])
post_title.append(filename[:-4])
g = open(filepath, encoding='gbk', errors='ignore')
# pat1 = re.compile(keystart1 + '(.*?)' + keyend1, re.S)
# result1 = pat1.findall(g.read())
pat2 = re.compile(keystart2 + '(.*?)' + keyend2, re.S)
result2 = pat2.findall(g.read())
post_content.append(result2)
# post_kind.append(result1)
# print(result1)
#print(result2)
g = open(filepath, encoding='gbk', errors='ignore')
pat1 = re.compile(keystart1 + '(.*?)' + keyend1, re.S)
result1 = pat1.findall(g.read())
if "[]" in result1:
#pat3 = re.compile('类别' + '(.*?)' + '种类', re.S)
ret = re.search('类别(.*?)种类', g.read(),re.S)
#result3 = pat3.findall(g.read())
post_kind.append(ret.group(1))
else:
post_kind.append(result1)
print(post_title)
#print(post_content)
df = pd.DataFrame({'post_title': post_title,
'post_kind':post_kind,
'post_content': post_content
})
print(df)
df.to_csv("D:xxx\output3.csv", encoding='utf_8_sig')
print('########导出完成############')