依赖:python 2.7
mkdir /home/hadoop/crawler
一、安装python 2.7.2
wget https://www.python.org/ftp/python/2.7.12/Python-2.7.12.tgz
tar -zxvf Python-2.7.12.tgz
cd Python-2.7.12
./configure --prefix=/usr/local
make
make altinstall
python2.7 -V
从python 2.6升级到 2.7
mv /usr/bin/python /usr/bin/python2.6.6
ln -s /usr/local/bin/python2.7 /usr/bin/python
修改/usr/bin/yum
将 #!/usr/bin/python 改为 #!/usr/bin/python2.6.6
二、安装pip(root装或者带root权限账号)
wget https://pypi.python.org/packages/source/p/pip/pip-1.3.1.tar.gz --no-check-certificate
wget http://pypi.python.org/packages/source/s/setuptools/setuptools-0.6c11.tar.gz
tar zxf setuptools-0.6c11.tar.gz
cd setuptools-0.6c11/
python setup.py build
python setup.py install
tar -xzvf pip-1.3.1.tar.gz
cd pip-1.3.1
python setup.py install
三、安装相关模块
pip install requests chardet web.py sqlalchemy gevent psutil
pip install requests chardet web.py sqlalchemy gevent psutil
pip install lxml
四、mysql配置
1、安装并启动mysql数据库
mysql -u root -p's9sfel1!fftep0(9' -h localhost
创建数据库:
create database proxy DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
CREATE USER 'proxy'@'%' IDENTIFIED BY 'proxy123$';
grant all privileges ON proxy.* TO 'proxy'@'%' WITH GRANT OPTION;
flush privileges;
2、安装pymysql
pip install pymysql
3、在config.py文件中配置DB_CONFIG。
DB_CONFIG={
'DB_CONNECT_TYPE':'sqlalchemy',
'DB_CONNECT_STRING':'mysql+pymysql://proxy:proxy123$@10.10.10.119/proxy?charset=utf8'
}
五、IPProxyPool安装
git clone https://github.com/qiyeboy/IPProxyPool
cd IPProxyPool
nohup python IPProxy.py >>ipproxy.log 2>&1 &
通过以下查看
http://192.168.130.140:8000/?types=0&count=5&country=国内
http://10.10.10.118:8000/?types=0&count=5&country=国内
---查看表,如果乱码,可临时调整以下字符集
mysql -u proxy -pproxy123$ -h centos-master
set character_set_client=utf8;
set character_set_connection=utf8;
set character_set_results=utf8;
set character_set_database=utf8;
set character_set_server=utf8;
set collation_connection=utf8;
set collation_database=utf8;
set collation_server=utf8;
set collation_connection=utf8;
show variables like 'character_set_%';
1.删除ip为218.201.98.196的代理:
http://192.168.130.140:8000/delete?ip=218.201.98.196
mkdir /home/hadoop/crawler
一、安装python 2.7.2
wget https://www.python.org/ftp/python/2.7.12/Python-2.7.12.tgz
tar -zxvf Python-2.7.12.tgz
cd Python-2.7.12
./configure --prefix=/usr/local
make
make altinstall
python2.7 -V
从python 2.6升级到 2.7
mv /usr/bin/python /usr/bin/python2.6.6
ln -s /usr/local/bin/python2.7 /usr/bin/python
修改/usr/bin/yum
将 #!/usr/bin/python 改为 #!/usr/bin/python2.6.6
二、安装pip(root装或者带root权限账号)
wget https://pypi.python.org/packages/source/p/pip/pip-1.3.1.tar.gz --no-check-certificate
wget http://pypi.python.org/packages/source/s/setuptools/setuptools-0.6c11.tar.gz
tar zxf setuptools-0.6c11.tar.gz
cd setuptools-0.6c11/
python setup.py build
python setup.py install
tar -xzvf pip-1.3.1.tar.gz
cd pip-1.3.1
python setup.py install
三、安装相关模块
pip install requests chardet web.py sqlalchemy gevent psutil
pip install requests chardet web.py sqlalchemy gevent psutil
pip install lxml
四、mysql配置
1、安装并启动mysql数据库
mysql -u root -p's9sfel1!fftep0(9' -h localhost
创建数据库:
create database proxy DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
CREATE USER 'proxy'@'%' IDENTIFIED BY 'proxy123$';
grant all privileges ON proxy.* TO 'proxy'@'%' WITH GRANT OPTION;
flush privileges;
2、安装pymysql
pip install pymysql
3、在config.py文件中配置DB_CONFIG。
DB_CONFIG={
'DB_CONNECT_TYPE':'sqlalchemy',
'DB_CONNECT_STRING':'mysql+pymysql://proxy:proxy123$@10.10.10.119/proxy?charset=utf8'
}
五、IPProxyPool安装
git clone https://github.com/qiyeboy/IPProxyPool
cd IPProxyPool
nohup python IPProxy.py >>ipproxy.log 2>&1 &
通过以下查看
http://192.168.130.140:8000/?types=0&count=5&country=国内
http://10.10.10.118:8000/?types=0&count=5&country=国内
---查看表,如果乱码,可临时调整以下字符集
mysql -u proxy -pproxy123$ -h centos-master
set character_set_client=utf8;
set character_set_connection=utf8;
set character_set_results=utf8;
set character_set_database=utf8;
set character_set_server=utf8;
set collation_connection=utf8;
set collation_database=utf8;
set collation_server=utf8;
set collation_connection=utf8;
show variables like 'character_set_%';
1.删除ip为218.201.98.196的代理:
http://192.168.130.140:8000/delete?ip=218.201.98.196