Python编写Windows Service服务定时爬虫程序

Python编写Windows Service服务爬虫程序

主要是一个定时到某网站抓取iconfont字体文件,供C#人员破解使用

# _*_ coding:utf-8 _*_
import win32serviceutil 
import win32service 
import win32event
import winerror
import servicemanager
import sys
import urllib.request
import re
import os
import time
from fontTools.ttLib import TTFont

class fontService(win32serviceutil.ServiceFramework):
    _svc_name_ = 'font_crawl'
    _svc_display_name_ = 'font_crawl'
    _svc_description_ = 'font_crawl'

    def __init__(self,args):
        win32serviceutil.ServiceFramework.__init__(self,args)
        self.hWaitStop = win32event.CreateEvent(None, 0, 0, None)
        self.run = True

    def SvcDoRun(self):
        while self.run:
            #网址按需修改
            self.getWoof('https://www.****.com/search/?partno=LM358&qty=100')
            #5分钟 : 300秒
            time.sleep(self.getXmlTime())
            
    def getXmlTime(self):
        # file = os.path.join(os.getcwd(), 'xml\fontService.xml')
        # file = os.path.join('D:\project\WWW\fontService', 'xml\fontService.xml')
        file = r'D:\project\WWW\fontService\xml\fontService.xml'
        with open(file) as xml:
            content = xml.read()
            times = re.findall(r"<time>(.*?)</time>", content)
            if len(times) > 0:
                return times[0]
            return None

    def getWoof(self,url):
        page = urllib.request.urlopen(url)
        page = page.read()
        html = page.decode('utf-8', 'ignore').replace(u'\xa9', u'')
        # "url('/statics/fonts/my_font15de21c670ae7c3f6f3f1f37029303c9.woff') format('woff')"
        woof_urls = re.findall(r"statics/fonts/(.*?).woff",html)
        if len(woof_urls) > 0:
            #文件名
            woof_name = woof_urls[0] + '.woff'
            #文件地址
            woof_url = 'https://www.****.net/statics/fonts/' + woof_name
            #下载到本地地址
            localPath = 'D:\project\WWW\fontService\ALLwoof\\'
            distPath = localPath + woof_name
            print("downloading file: " + woof_url)
            urllib.request.urlretrieve(woof_url, distPath)
            #转换xml
            font = TTFont(distPath)
            font.saveXML(localPath + woof_urls[0]+'.xml')


    def SvcStop(self):
        #停止服务
        self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
        win32envent.SetEvent(self.hWaitStop)
        self.run = False


if __name__ == '__main__':
    import sys
    import servicemanager
    if len(sys.argv) == 1:
        try:
            evtsrc_dll = os.path.abspath(servicemanager.__file__)
            servicemanager.PrepareToHostSingle(fontService)
            servicemanager.Initialize('fontService',evtsrc_dll)
            servicemanager.StartServiceCtrlDispatcher()
        except win32service.error as details:
            import winerror
            if details == winerror.ERROR_FAILED_SERVICE_CONTROLLER_CONNECT:
                win32serviceutil.usage()
    else:
        win32serviceutil.HandleCommandLine(fontService)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值