# -*- coding: utf-8 -*- """ @Time: 2018/1/14 @Author: songhao @微信公众号: zeropython @File: c3.py """ """ 方式一: 进入项目目录,运行 <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/scrapy" title="View all posts in scrapy" target="_blank">scrapy</a></span>3 crawl spiername """ # <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/scrapy" title="View all posts in scrapy" target="_blank">scrapy</a></span>3 startproject demo demo1 # cd demo1 # <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/scrapy" title="View all posts in scrapy" target="_blank">scrapy</a></span>3 crawl spiername """ 方式二:这种方式可以在Pycharm中运行debug 如何单独运行 一个scrapy """ import sys import os from scrapy.cmdline import execute # 当前文件的父目录 file_path = os.path.dirname(__file__) print(file_path) # /Users/songhao/py/re_c """ sys.path是<span class="wp_keywordlink"><a href="http://www.168seo.cn/python" title="python">python</a></span>的搜索模块的路径集,是一个list 可以在<span class="wp_keywordlink"><a href="http://www.168seo.cn/python" title="python">python</a></span> 环境下使用sys.path.append(path)添加相关的路径,但在退出<span class="wp_keywordlink"><a href="http://www.168seo.cn/python" title="python">python</a></span>环境后自己添加的路径就会自动消失了! """ sys.path.append(os.path.dirname(__file__)) # 获取文件的绝对路径 print(__file__) # /Users/songhao/py/re_c/c3.py print(sys.path) # ['/Users/songhao/py/re_c', '/Users/songhao/py', '/usr/local/Cellar/<span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/python3" title="View all posts in python3" target="_blank">python3</a></span>/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/<span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/python3" title="View all posts in python3" target="_blank">python3</a></span>6.zip', '/usr/local/Cellar/<span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/python3" title="View all posts in python3" target="_blank">python3</a></span>/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6', '/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/lib-dynload', '/Users/songhao/Library/Python/3.6/lib/python/site-packages', '/usr/local/lib/python3.6/site-packages', '/Applications/PyCharm.app/Contents/helpers/<span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/pycharm" title="View all posts in pycharm" target="_blank">pycharm</a></span>_matplotlib_backend', '/Users/songhao/py/re_c'] execute("scrapy crawl spidername".split(' ')) """ 方式三: 单独运行一个文件 scrapy3 genspider d_image macappstore.net """ # scrapy3 genspider d_image macappstore.net # scrapy3 runspider d_image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
# -*- coding: utf-8 -*-
"""
@Time: 2018/1/14
@Author: songhao
@微信公众号: zeropython
@File: c3.py
"""
"""
方式一:
进入项目目录,运行 scrapy3 crawl spiername
"""
# scrapy3 startproject demo demo1
# cd demo1
# scrapy3 crawl spiername
"""
方式二:这种方式可以在Pycharm中运行debug
如何单独运行 一个scrapy
"""
import
sys
import
os
from
scrapy
.
cmdline
import
execute
# 当前文件的父目录
file_path
=
os.path
.
dirname
(
__file__
)
print
(
file_path
)
# /Users/songhao/py/re_c
"""
sys.path是python的搜索模块的路径集,是一个list
可以在python 环境下使用sys.path.append(path)添加相关的路径,但在退出python环境后自己添加的路径就会自动消失了!
"""
sys
.
path
.
append
(
os.path
.
dirname
(
__file__
)
)
# 获取文件的绝对路径
print
(
__file__
)
# /Users/songhao/py/re_c/c3.py
print
(
sys
.
path
)
# ['/Users/songhao/py/re_c', '/Users/songhao/py', '/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python36.zip', '/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6', '/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/lib-dynload', '/Users/songhao/Library/Python/3.6/lib/python/site-packages', '/usr/local/lib/python3.6/site-packages', '/Applications/PyCharm.app/Contents/helpers/pycharm_matplotlib_backend', '/Users/songhao/py/re_c']
execute
(
"scrapy crawl spidername"
.
split
(
' '
)
)
"""
方式三: 单独运行一个文件
scrapy3 genspider d_image macappstore.net
"""
# scrapy3 genspider d_image macappstore.net
# scrapy3 runspider d_image.py
|