import os
import urllib
import random
def save_url_content(url,folder_path):
if not (url.startswith('http://') and url.startswith('https://')):
print u'error'
if not os.path.isdir(folder_path):
return u'folder_path not a folder'
d = urllib.urlopen(url)
content = d.read()
print content
random_name = 'test_%s.txt' % random.randint(1,1000)
filepath = os.path.join(folder_path,random_name)
file_handle = open(filepath,'w')
file_handle.write(content)
file_handle.close()
return filepath
print save_url_content('http://www.baidu.com','F:\\')
def get_url_list(url):
if not (url.startswith('http://') and url.startswith('https://')):
print u'error'
d = urllib.urlopen(url)
content = d.read()
print content
return len(content.split('<a href=')) -1
print get_url_list("http://www.baidu.com")