Python Bugzilla

最新推荐文章于 2025-10-22 18:28:34 发布

原创最新推荐文章于 2025-10-22 18:28:34 发布 · 1k 阅读

0 ·

CC 4.0 BY-SA版权

Python 专栏收录该内容

8 篇文章

订阅专栏

该博客使用Python的urllib2和lxml库自动化登录Bugzilla并下载指定页面的附件。通过设置请求头和Cookie来模拟浏览器行为，解析HTML获取附件链接，并保存为图片文件。

# -*- coding: utf-8 -*-
import urllib
import urllib2
import re
import os
from lxml import etree
url = 'http://bugzilla.bmsoft.cn/buglist.cgi?bug_status=NEW&bug_status=UNCONFIRMED&bug_status=CONFIRMED&bug_status=IN_PROGRESS&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=TEST%20FAILED&component=2D%E7%AE%97%E6%B3%95&list_id=26923&product=OMM%E9%A1%B9%E7%9B%AE&query_format=advanced&resolution=---&resolution=FIXED&resolution=INVALID&resolution=WONTFIX&resolution=DUPLICATE&resolution=WORKSFORME'
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'
#referer = 'http://bugzilla.bmsoft.cn/show_bug.cgi?id=766'
cookies = 'Bugzilla_login=10; Bugzilla_logincookie=ufihUt9sKs; Bugzilla_login_request_cookie=sqaYv1gTz6'
#postdata = {'username':'ligu.xiang@bomming.com',
#           'password':'************'}
#  info 需要被编码为URLlib2能理解的格式，这里用到的是urllib
#data = urllib.urlencode(postdata)
req = urllib2.Request(url)
#将user_agent,referer写入头信息
req.add_header('User-Agent',user_agent)
#req.add_header('Referer',referer)
req.add_header('Cookie',cookies)
#req.add_data(data)
response = urllib2.urlopen(req)
html = response.read()
#print html
'''
local_dir = "bugzilla/"
save_path = local_dir + 'index1.htm'
fileobj = open(save_path,'wb')
fileobj.write(html)
fileobj.close()
'''
selector=etree.HTML(html)
urls = selector.xpath("//td[@class = 'first-child bz_id_column']/a/@href")
#print urls

for i in range(len(urls)):
    newurl = 'http://bugzilla.bmsoft.cn/'+urls[i]
    #print newurl
    attachmentreq = urllib2.Request(newurl)
    attachmentreq.add_header('User-Agent', user_agent)
    attachmentreq.add_header('Cookie', cookies)
    newresponse = urllib2.urlopen(attachmentreq)
    newhtml = newresponse.read()
   # print newhtml
    attachmentselector = etree.HTML(newhtml)
    attachmenturl = attachmentselector.xpath("//tr/td[1]/a/@href")
   # print attachmenturl
    for i in range(len(attachmenturl)):
        tempurl = 'http://bugzilla.bmsoft.cn/'+attachmenturl[i]
        if 'attachment.cgi?id' in tempurl:
            dirname = attachmenturl[i].split('=')
            os.makedir('bugzilla/'+dirname[1])
            finalurl = 'http://bugzilla.bmsoft.cn/'+attachmenturl[i]
            urllib.urlretrieve(finalurl, 'img' + str(i) + '.jpg', Schedule)

备注：能够正常登录bugzilla；但是不能下载图片(问题遗留)