Python Bugzilla

该博客使用Python的urllib2和lxml库自动化登录Bugzilla并下载指定页面的附件。通过设置请求头和Cookie来模拟浏览器行为,解析HTML获取附件链接,并保存为图片文件。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

# -*- coding: utf-8 -*-
import urllib
import urllib2
import re
import os
from lxml import etree
url = 'http://bugzilla.bmsoft.cn/buglist.cgi?bug_status=NEW&bug_status=UNCONFIRMED&bug_status=CONFIRMED&bug_status=IN_PROGRESS&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=TEST%20FAILED&component=2D%E7%AE%97%E6%B3%95&list_id=26923&product=OMM%E9%A1%B9%E7%9B%AE&query_format=advanced&resolution=---&resolution=FIXED&resolution=INVALID&resolution=WONTFIX&resolution=DUPLICATE&resolution=WORKSFORME'
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'
#referer = 'http://bugzilla.bmsoft.cn/show_bug.cgi?id=766'
cookies = 'Bugzilla_login=10; Bugzilla_logincookie=ufihUt9sKs; Bugzilla_login_request_cookie=sqaYv1gTz6'
#postdata = {'username':'ligu.xiang@bomming.com',
#           'password':'************'}
#  info 需要被编码为URLlib2能理解的格式,这里用到的是urllib
#data = urllib.urlencode(postdata)
req = urllib2.Request(url)
#将user_agent,referer写入头信息
req.add_header('User-Agent',user_agent)
#req.add_header('Referer',referer)
req.add_header('Cookie',cookies)
#req.add_data(data)
response = urllib2.urlopen(req)
html = response.read()
#print html
'''
local_dir = "bugzilla/"
save_path = local_dir + 'index1.htm'
fileobj = open(save_path,'wb')
fileobj.write(html)
fileobj.close()
'''
selector=etree.HTML(html)
urls = selector.xpath("//td[@class = 'first-child bz_id_column']/a/@href")
#print urls

for i in range(len(urls)):
    newurl = 'http://bugzilla.bmsoft.cn/'+urls[i]
    #print newurl
    attachmentreq = urllib2.Request(newurl)
    attachmentreq.add_header('User-Agent', user_agent)
    attachmentreq.add_header('Cookie', cookies)
    newresponse = urllib2.urlopen(attachmentreq)
    newhtml = newresponse.read()
   # print newhtml
    attachmentselector = etree.HTML(newhtml)
    attachmenturl = attachmentselector.xpath("//tr/td[1]/a/@href")
   # print attachmenturl
    for i in range(len(attachmenturl)):
        tempurl = 'http://bugzilla.bmsoft.cn/'+attachmenturl[i]
        if 'attachment.cgi?id' in tempurl:
            dirname = attachmenturl[i].split('=')
            os.makedir('bugzilla/'+dirname[1])
            finalurl = 'http://bugzilla.bmsoft.cn/'+attachmenturl[i]
            urllib.urlretrieve(finalurl, 'img' + str(i) + '.jpg', Schedule)

备注:能够正常登录bugzilla;但是不能下载图片(问题遗留) 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值