python批处理实现爬取网页静态图片文件重命名图片统一修改大小等功能

本文链接：https://blog.youkuaiyun.com/qq_26074263/article/details/77532024

本文介绍了如何使用Python进行批处理操作，包括爬取网页上的静态图片，对图片进行重命名，并统一修改图片大小。通过特定字符分割文档，批量处理增加文件后缀名，实现高效自动化操作。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

按照特定字符（此处选为字符’j’）分割文档成多个小文档

def main():
    with open("dir/origin.txt","r") as f:
        temp = f.readlines()
        words = []
        L = len(temp)
        for i in range(L):
            if "j" in temp[i] :
                words = temp[i]
                with open('dir/'+words[0:6]+'.txt', 'w') as g:
                    for j in range(i+1, L):

                        if "j" in temp[j]:
                            break
                        g.write(temp[j])



if __name__ == '__main__':

    main()

爬取网页静态图片

###coding=utf-8
##
###urllib模块提供了读取Web页面数据的接口
##import urllib.request
###re模块主要包含了正则表达式
##import re
###定义一个getHtml()函数
##def getHtml(url):
##    page = urllib.request.urlopen(url)  #urllib.urlopen()方法用于打开一个URL地址
##    html = page.read() #read()方法用于读取URL上的数据
##    return html
##
##def getImg(html):
##    reg = r'src="(.+?\.jpg)" pic_ext'    #正则表达式，得到图片地址
##    imgre = re.compile(reg)     #re.compile() 可以把正则表达式编译成一个正则表达式对象.
##    imglist = re.findall(imgre,html)      #re.findall() 方法读取html 中包含 imgre（正则表达式）的    数据
##    #把筛选的图片地址通过for循环遍历并保存到本地
##    #核心是urllib.urlretrieve()方法,直接将远程数据下载到本地，图片通过x依次递增命名
##    x = 0
##
##    for imgurl in imglist:
##        urllib.request.urlretrieve(imgurl,'/Users/c-ten/Desktop/worm/%s.jpg' % x)
##        x+=1
##
##
##html = getHtml("http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=masked%20faces&hs=2&xthttps=000000&fr=ala&ori_query=masked%20faces&ala=0&alatpl=sp&pos=0")
###print getImg(html)
#<span style="color:#330099;">'''
#Created on 2017-4-17

#@author: Administrator
#'''
import urllib.request
import re

def gethtml(url):
    html=urllib.request.urlopen(url)
    data=html.read()
    return data.decode("utf-8")
def getimg(html):
    dir="/Users/c-ten/Desktop/worm/"  #图片保存路径
    reg='src="(.+?\.jpg)" pic_ext' 
    imgreg=re.compile(reg)
    imglist=imgreg.findall(html)
    x=0
    for i in imglist:
        print(i)
        urllib.request.urlretrieve(i,'{}{}.jpg'.format(dir, x))
        x=x+1

print('gethtml')
html=gethtml("http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=%BF%DA%D5%D6%C3%B1%D7%D3%D5%DA%B5%B2%C8%CB%C1%B3%CD%BC%C6%AC&hs=2&xthttps=000000&fr=ala&ori_query=%E5%8F%A3%E7%BD%A9%E5%B8%BD%E5%AD%90%E9%81%AE%E6%8C%A1%E4%BA%BA%E8%84%B8%E5%9B%BE%E7%89%87&ala=0&alatpl=sp&pos=0")
print('getimg')
getimg(html)
#</span><span style="color:#333333;">
#</span>

计算文档行数

def main():
    with open("list.txt","r") as f:
        temp = f.readlines()
        L = len(temp)
        print(L)
    f.close()



if __name__ == '__main__':

    main()

批量增加后缀名

# -*- coding: cp936 -*-
import os
path = 'jpg_files_dir'

for file in os.listdir(path):

    if os.path.isfile(os.path.join(path,file))==True:

        if file.find('.')<0:

            newname=file+'.jpg'

            os.rename(os.path.join(path,file),os.path.join(path,newname))