文件处理篇python

本文介绍如何使用Python进行文件处理,包括读取data1.csv并去除空格、逆序排列、以分号分隔并保存为data3.csv;计算latex.log文件的总行数、非空行数及不重复非空行数;生成随机问卷并统计答案;创建通信录csv文件;设计随机谜语库;以及统计《哈姆雷特》中最常出现的单词。

 请将文件data1.csv的每行的空格去掉并按照列逆序排列,数据之间用分号隔开,将结果存放在data3.csv文件并输出。


import csv

def readfile(filename):
    with open(filename,"r",encoding="utf-8-sig") as f:
        text=f.read()
    return text

def filereverse(filename1,filename2):
    with open(filename1,"r",encoding="utf-8-sig") as f:
        lst=f.readlines()
    with open(filename2,"w",encoding="utf-8-sig") as f:
        for line in lst:
            line = line.replace("\n","")
            ls=line.split(",")
            ls=ls[::-1]
            line=";".join(ls)
            line=line+"\n"
            f.write(line)

filepath1="txt\\data1.csv"
filepath2="txt\\data3.csv"

text=readfile(filepath1)
print("{}文件:\n{}".format(filepath1,text))

filereverse(filepath1,filepath2)
text=readfile(filepath2)
print("{}文件:\n{}".format(filepath2,text))


用两种方法求文件“latex.log”的文件总行数、除了空行以外的文件行数、除了空行以外的不重复的文件行数。

def filelines1(filename):
          with open(filename,'r',encoding='utf-8') as f:
                    cnt1=0
                    cnt2=0
                    lst=[]
                    for line in f:
                              cnt1=cnt1+1
                              if line!='\n':
                                        cnt2=cnt2+1
                                        if line not in lst:
                                                  lst.append(line)
                    cnt3=len(lst)
                    return (cnt1,cnt2,cnt3)

def filelines2(filename):                                    
          with open(filename,'r',encoding='utf-8') as f:
                    lst=f.readlines()
          cnt1=len(lst)
          lst.remove("\n")
          cnt2=len(lst)
          cnt3=len(list(set(filename)))
          return (cnt1,cnt2,cnt3)

filepath="TXT\\latex.log"

print("方法一:文件一行一行访问")
cnt1,cnt2,cnt3=filelines1(filepath)
print("{} 文件总行数:{}".format(filepath,cnt1))
print("{} 除了空行以外的文件行数:{}".format(filepath,cnt2))
print("{} 除了空行以外不重复的文件行数:{}\n".format(filepath,cnt3))

print("方法二:文件读出到列表中")
print("{} 文件总行数:{}".format(filepath,cnt1))
print("{} 除了空行以外的文件行数:{}".format(filepath,cnt2))
print("{} 除了空行以外不重复的文件行数:{}\n".format(filepath,cnt3))

调查问卷随机产生,写入文件。统计各评语出现的次数,生成字典。把统计结果再写入文件。


import random

def Qnaire(lst,n):  # 从lst中,随机产生n份调查问卷,存于字符串并返回
    result=[]
    for i in range(n):
        x=random.choice(comments)
        result.append(x)
    text=','.join(str(i) for i in result)
    return text

def Qnairewrite(filename,text):  # 将字符串text写入文件filename中
    with open(filename,'w',encoding='utf-8') as f:
        f.write(text)  

def Qnaireread(filename):   # 将文件内容读出,放于字符串中,并返回
     with open(filename,'r',encoding='utf-8') as f:
        txt=f.read()   
        return txt

def cntComments(text):  # 对字符串text,统计各评语出现的次数,生成字典,并返回
    s=text.split(',')
    dic={}
    for i in s:
       dic[i]=dic.get(i,1)+1 
    return dic
    
def dictappend(filename,dicCnts):  # 往将调查问卷的结果dicCnts,追加到文件中filename
    with open(filename,'a+',encoding='utf-8') as f:
        f.write("\n根据统计,对伙食感觉:")
        for k,v in dicCnts.items():
            f.write("\n{}的学生:{}人".format(k,v))
        f.write("\n调查结果中,出现次数最多的评语是:")  
        f.write(max(dicCnts))  

pathfile="TXT\\result1.txt"

comments=['不满意','一般','满意','很满意']
result=Qnaire(comments,90)   # 产生90份随机调查问卷结果,放于result中
Qnairewrite(pathfile,result)     # 将调查问卷结果result,写入文件pathfile

result=Qnaireread(pathfile)   # 将文件pathfile中内容读出,放于result中
print("\n{} 调查问卷(随机产生):\n\n{}".format(pathfile,result))

dicCnts=cntComments(result)  # 将调查结果result,统计各评语出现的次数,生成字典,放于dicCnts中
print("\n调查问卷中各评语的统计结果:\n{}".format(dicCnts))

dictappend(pathfile,dicCnts) # 将各评语出现的次数dicCnts,追加写入文件pathfile中

result=Qnaireread(pathfile)   # 将文件pathfile中内容读出,放于result中
print("\n{} 调查问卷及各评语的统计结果:\n\n{}".format(pathfile,result))


创建“通信录.csv”文件。


         "小亮":{"手机":"13913000002","QQ":"13913000002","微信":"13913000002"},
         "小刚":{"手机":"13913000003","QQ":"18191220003","微信":"gang1004"},
         "大刘":{"手机":"13914000001","QQ":"18191230001","微信":"liu666"},
         "大王":{"手机":"13914000002","QQ":"18191230002","微信":"jack_w"},
         "大张":{"手机":"13914000003","QQ":"18191230003","微信":"13914000003"}}
a=[]
for k,v in dictTXL.items():
    a.append(k)
    for c,d in v.items():
        a.append(d)
b=[]
for i in range(len(a)+4):
    if i==0:
        b.append(["姓名","手机","QQ","微信"])
    if i!=0 and i%4==0:
        b.append(a[i-4:i])
print(b)

import csv
with open('通信录.csv','a',newline='',encoding='utf-8-sig')as 通信录csv:
    writer=csv.writer(通信录csv)
    writer.writerows(b)

n=input('姓名:')
for k,v in dictTXL.items():
    if k==n:
        for e,f in v.items():
            print(e,f)

随机组谜语卷


import os
import csv
import random

#定义函数打开文件,将谜语集读成字典
def getDic(fileName):
    dic={}
    with open(fileName,"r",encoding="utf-8") as file:
        reader=csv.reader(file)
        next(reader)            #跳过文件中的表头
        for row in reader:
            dic[row[0]]=row[1]  #谜面为作为key,谜底作为value
        return dic
#定义函数根据dic生成长度为n的试卷列表,每一个元素为一套试卷列表
def creatPapers(dic,n):
    tests=[]
    items=list(dic.keys())
    for i in range(n):
        random.shuffle(items)
        ls=items[:10]
        tests.append(ls)
    return tests

#定义函数lsPapers和lsAnswers生成n个试卷文件和n个答卷文件
def createFiles(lsPapers,lsAnswers,n):
    for i in range(n):
        fpn="paper" + str(i+1) + ".txt"
        with open(fpn,"w",encoding="utf-8") as filep:
            filep.writelines([item + "\n" for item in lsPapers[i]])
        fan="answer" + str(i+1) + ".txt"
        with open(fan,"w",encoding="utf-8")as filea:
             filea.writelines([item + "\n" for item in lsAnswers[i]])
                
#主程序,生成n套试卷和答卷
os.chdir("C:\\202042801117 林琦雯\\txt")
fn="儿童谜语集.csv"
n=5
riddles=getDic(fn)
papers=creatPapers(riddles,n)

answers=[]                          #根据谜面列表papers生成对应的答案列表
for paper in papers:
         ans=[riddles[item] for item in paper]
         answers.append(ans)
createFiles(papers,answers,n)


统计《哈姆雷特》中出现频率最高的前10个单词。


def getText(text):
    text = text.lower()
    for ch in ",.;?-:\'":
        text = text.replace(ch," ")
        return text

#编码函数统计单词出现频率
#text 为待统计的文本,topn表示取频率最高的单词个数
def wordFreqs(text,topn):
    words = text.split( )            # 将文本分词
    counts = { }
    for word in words:
        counts[word] = counts.get(word,0) + 1
    excludes={"the","and","to","of","a","be","it","is","not","but"}
    for word in excludes:
        del(counts[word])
    items = list(counts.items())
    items.sort(key=lambda x:x[1], reverse=True)
    return items[:topn]

#编写主程序,调用函数
try:
    with open (r"C:\202042801117 林琦雯\txt\hamlet.txt",'r',encoding='utf-8') as file:
        text = file.read()
        text = getText(text)
        freqs = wordFreqs(text,10)
except IOError:
    print("文件不存在,请确认!\n")
else:
    try:
        with    open(r"C:\202042801117 林琦雯\txt\hamlet_词频.txt",'w',encoding='utf-8')as fileFreq:
                items=[ word + "\t" + str(freq) + "\n" for word, freq in freqs]
                fileFreq.writelines(items)
    except IOError:
        print("写入文件出错")
        for word,freq in freqs:
            print("{:<10}{:>}".format(word,freq))
        

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值