网站自动化登录与爬虫-优快云博客

某网站转刷器
#!/usr/bin/python
# -*- coding: utf-8 -*-

import requests
import time
import urllib2
import re
import BeautifulSoup
#you need to install BeautifulSoup and requests modules from http://pypi.python.org/ manuelly

def main():
    url = 'http://www.the_web_site.com/login.aspx'
    headers = {'content-type': 'application/x-www-form-urlencoded'}
    
    #use requests to keep the cookies
    session = requests.Session()
    response = session.get(url, headers=headers)
    
    #use beautifulsoup module to retrieve hidden postdata __VIEWSTATE and __EVENTVALIDATION
    soup = BeautifulSoup.BeautifulSoup(response.content)
    
    postdata = {
        '__VIEWSTATE': soup.find('input', id='__VIEWSTATE')['value'],
        '__EVENTVALIDATION': soup.find('input', id='__EVENTVALIDATION')['value'],
        'ctl00$ContentPlaceHolder1$UserName1': 'username',
        'ctl00$ContentPlaceHolder1$Password1': 'password',
        'ctl00$ContentPlaceHolder1$RememberMe1': 'on',
        'ctl00$ContentPlaceHolder1$LoginButton1.x': '46',
        'ctl00$ContentPlaceHolder1$LoginButton1.y': '0'
    }
    
    #login to the site
    response = session.post(url, data=postdata, headers=headers)
    print response
    #search_file_write(response)
    
    #get the web page content
    output = session.get('http://www.the_web_site.com/the_web_page.aspx')
    print output
    #search_file_write(output)
    respHtml = output.text
    #print respHtml
    #print type(respHtml)
    
    #check the webpage whether there have something you want
    found_s = re.search(u'http://www.the_web_site.com/\d{7}.aspx\?sjuser=.{14}', respHtml)    
    found_p = re.search(u'分</span>：\d{1,3}分', respHtml)
    print "found =",found_s
    
    if(found_s):
        #print the search result
        print "\033[1;32;40mThere have something !\033[0m"
        s_link = found_s.group(0)
        s_point = found_p.group(0)
        print "URL=",s_link
        print "Point=",s_point
    else:
        print "\033[1;31;40mno found.\033[0m"


def search_file_write(find_url):
    spath = "record.txt"
    f = open(spath,'a')
    f.write(u"%s  \n"  %find_url)
    f.close()

if __name__=="__main__":
    while True:
        main()
        time.sleep(30)
转载于:https://blog.51cto.com/bxuan/1567203