项目拓扑图:
项目具体步骤:
部署反向代理集群
- 安装nginx
方式:1、yum安装
2、手动编译安装
在这里我选择的是yum安装。
1、yum install epel-release -y
2、yum install nginx -y
检查是否安装成功:rpm -qa |grep nginx
yum安装后,nginx的路径为/etc/nginx
修改配置文件:
#自定义虚拟主机
先在 etc/nginx/nginx.conf 主配置文件,http作用域块中添加指令
include conf.d/*.conf;
在conf目录下创建目录conf.d,添加xy.conf文件
[root@kafka01 conf.d]# cat xy.conf
server {
listen 8080; #监听端口
server_name www.xy.com www.xy2.com; #域名
access_log logs/xy.log main; #访问日志存放路径
location / {
root /opt/html; #网站家目录
index a.html; #设置索引文件
}
}
使用nginx的7层反向代理(根据curl代理)
在后端服务器集群上,安装python3
yum install python3 -y
pip3 install flask
编辑flask测试程序:
[root@node1 ~]# cat flask-test.py
from flask import Flask
app = Flask(__name__)
@app.route("/")
def index():
return "this is flask index"
@app.route("/test")
def test():
return "this is flask test"
app.run(host="0.0.0.0")
#运行flask web
[root@node1 ~]# nohup python3 flask-test.py &
[1] 2273
[root@node1 ~]# nohup: 忽略输入并把输出追加到"nohup.out"
[root@node1 ~]# ps -ef |grep python
root 1056 1 0 10:21 ? 00:00:01 /usr/bin/python2 -Es /usr/sbin/tuned -l -P
root 2273 1981 5 11:52 pts/1 00:00:00 python3 flask-test.py
root 2278 1981 0 11:52 pts/1 00:00:00 grep --color=auto python
#后端真实服务器,关闭防火墙,保证能和前面的代理机网络互通
#在前面nginx代理集群上添加
location /api {
proxy_pass http://192.168.1.117:5000/; #如果在最后添加/ 就表示把/api后面携带的url,向后端真实服务器发送请求
}
########后端服务器日志获取真实ip地址
location /api {
proxy_set_header Host $http_host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_pass http://192.168.1.117:5000/;
}
#后端真实服务器测试修改
安装gunicorn -- pip3 install gunicron
将flask-test.py里的最后一行app.run()注释掉。
启动服务使用gunicorn
gunicorn flask-test:app -b "0.0.0.0:5000" --access-logfile='./access_sc.log' --access-logformat='%({x-forwarded-for}i)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"' &
添加负载均衡:
在xy.conf文件中和server作用域同级下添加:
upstream flask_backend{
server 192.168.1.117:5000;
server 192.168.1.117:6000;
}
修改proxy_pass:
proxy_pass http://flask_backend/;
######负载均衡-健康检查
nginx --自带检查(被动)
当有请求过来的时候,如果请求刚好被代理到down掉的后端真实机,会重新转发一次,转发到可以正常提供服务的真实服务器。
这种检查被动,不能提前发现后端真实服务器的一个状态,而且可能会产生多一次转发,影响效率。
nginx -- 主动健康检查 -- nginx_upstream_check_module
去/opt目录下,下载模块
wget https://codeload.github.com/yaoweibin/nginx_upstream_check_module/zip/master
安装unzip patch命令: yum install unzip patch -y
在/opt目录下解压:unzip master
#去nginx下载路径下,打补丁
patch -p1 < /opt/nginx_upstream_check_module-master/check_1.20.1+.patch
#重新编译,添加健康检查模块
./configure --prefix=/usr/local/sc --user=sc --with-threads --with-http_ssl_module --with-http_v2_module --with-http_stub_status_module --with-stream --with-http_realip_module --with-http_auth_request_module --add-module=/opt/nginx_upstream_check_module-master/
#重新安装
#make && make install
修改配置文件xy.conf:
upstream flask_backend{
server 192.168.1.117:5000;
server 192.168.1.117:6000;
check interval=5000 rise=2 fall=5 timeout=1000 type=http;
check_http_send "HEAD / HTTP/1.0\r\n\r\n";
check_http_expect_alive http_2xx http_3xx;
}
部署keepalived,实现高可用。
#安装keepalived
yum install keepalived -y
#配置文件: /etc/keepalived/keepalived.conf
#先备份配置文件
#安装 :yum install keepalived -y
主服务器:
编辑 /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 192.168.200.1
smtp_connect_timeout 30
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
# vrrp_strict #严格遵守vrrp协议
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_instance VI_1 {
state MASTER
interface ens33
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.100.250
}
}
备服务器:
主配置文件:/etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 192.168.200.1
smtp_connect_timeout 30
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
# vrrp_strict #严格遵守vrrp协议
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_instance VI_1 {
state BACKUP #设置为备
interface ens33 #虚拟ip绑定到哪个网卡
virtual_router_id 51 #0-255#虚拟路由id 在同一个局域网内 来区分不同的keepalive集群 ,
#如果在同一个keepalive集群中,那每台主机的router id都是一样的
priority 50 #0-255优先级, 优先越高拿到虚拟ip的概率就会越大
advert_int 1 #隔1s钟发送一次存活检测
authentication { #认证方式
auth_type PASS
auth_pass 1111
}
virtual_ipaddress { #设置虚拟ip
192.168.100.250
}
}
##########启动服务
systemctl start keepalived
#######根据nginx服务检测,防止出现脑裂现象而使服务挂掉
#添加检测脚本:
/opt/check_nginx.sh
[root@master keepalived]# cat /opt/check_nginx.sh
/usr/sbin/pidof nginx &>/dev/null
#添加权限 chmod +x check_nginx.sh
#修改主服务器配置:
! Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 192.168.200.1
smtp_connect_timeout 30
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
# vrrp_strict #严格遵守vrrp协议
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_script chk_http_port {
script "/opt/check_nginx.sh" #检测脚本位置
interval 2 #检测间隔
weight -60 #当检测脚本返回非0,将优先级-60
}
vrrp_instance VI_1 {
state MASTER
interface ens33
virtual_router_id 60
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
#添加检测脚本
track_script {
chk_http_port
}
virtual_ipaddress {
192.168.1.250
}
}
###########双vip互为主备,提高资源利用。
#1.250的主配置:
! Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 192.168.200.1
smtp_connect_timeout 30
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
# vrrp_strict #严格遵守vrrp协议
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_script chk_http_port {
script "/opt/check_nginx.sh"
interval 2
weight -60
}
vrrp_instance VI_1 {
state MASTER
interface ens33
virtual_router_id 60
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
track_script {
chk_http_port
}
virtual_ipaddress {
192.168.1.250
}
}
vrrp_instance VI_2 {
state BACKUP #设置为备
interface ens33 #虚拟ip绑定到哪个网卡
virtual_router_id 61 #0-255#虚拟路由id 在同一个局域网内 来区分不同的keepalive集群 ,
#如果在同一个keepalive集群中,那每台主机的router id都是一样的
priority 50 #0-255优先级, 优先越高拿到虚拟ip的概率就会越大
advert_int 1 #隔1s钟发送一次存活检测
authentication { #认证方式
auth_type PASS
auth_pass 1111
}
virtual_ipaddress { #设置虚拟ip
192.168.1.251
}
#1.251的主配置:
Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 192.168.200.1
smtp_connect_timeout 30
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
# vrrp_strict #严格遵守vrrp协议
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_instance VI_1 {
state BACKUP #设置为备
interface ens33 #虚拟ip绑定到哪个网卡
virtual_router_id 60 #0-255#虚拟路由id 在同一个局域网内 来区分不同的keepalive集群 ,
#如果在同一个keepalive集群中,那每台主机的router id都是一样的
priority 50 #0-255优先级, 优先越高拿到虚拟ip的概率就会越大
advert_int 1 #隔1s钟发送一次存活检测
authentication { #认证方式
auth_type PASS
auth_pass 1111
}
virtual_ipaddress { #设置虚拟ip
192.168.1.250
}
}
vrrp_instance VI_2 {
state MASTER #设置为备
interface ens33 #虚拟ip绑定到哪个网卡
virtual_router_id 61 #0-255#虚拟路由id 在同一个局域网内 来区分不同的keepalive集群 ,
#如果在同一个keepalive集群中,那每台主机的router id都是一样的
priority 100 #0-255优先级, 优先越高拿到虚拟ip的概率就会越大
advert_int 1 #隔1s钟发送一次存活检测
authentication { #认证方式
auth_type PASS
auth_pass 1111
}
virtual_ipaddress { #设置虚拟ip
192.168.1.251
}
}
部署后端服务器集群
#1、nfs配置 https://blog.youkuaiyun.com/sj349781478/article/details/79970739
安装: yum install nfs-utils
启动: systemctl start nfs
在服务端:
在opt目录下创建html目录,编写index.html
编辑/etc/exports文件
/opt/html 192.168.1.121(ro,no_root_squash)
[root@web-3 html]# exportfs
/opt/html 192.168.1.121
systemctl restart nfs
在客户端:
mount -t nfs 192.168.1.117:/opt/flaskapp/opt/flaskapp
2、后端真实服务器上启动flask
/opt/flaskapp目录下执行:
gunicorn flask-test:app -b "0.0.0.0:5000" --access-logfile="./flask.log" &
3、反向代理,nginx配置文件修改
upstream test {
server 192.168.1.117:5000;
server 192.168.1.121:5000;
}
server {
listen 80;
server_name www.sctest.com;
location / {
proxy_pass http://test;
}
}
部署zookeeper和kafka集群
1、安装:
安装java:yum install java wget -y
安装kafka: wget https://mirrors.bfsu.edu.cn/apache/kafka/2.8.1/kafka_2.12-2.8.1.tgz
解包:
tar xf kafka_2.12-2.8.1.tgz
使用自带的zookeeper集群配置
安装zookeeper:
wget https://mirrors.bfsu.edu.cn/apache/zookeeper/zookeeper-3.6.3/apache-zookeeper-3.6.3-bin.tar.gz
2、配置kafka
修改config /server.properties:
broker.id=0
listeners=PLAINTEXT://nginx-kafka01:9092
zookeeper.connect=192.168.0.94:2181,192.168.0.95:2181,192.168.0.96:2181
3、配置zk
进入/opt/apache-zookeeper-3.6.3-bin/confs
cp zoo_sample.cfg zoo.cfg
修改zoo.cfg, 添加如下三行:
server.1=192.168.0.94:3888:4888 #添加三台虚拟机各自的主机名和ip
server.2=192.168.0.95:3888:4888
server.3=192.168.0.96:3888:4888
3888和4888都是端口 一个用于数据传输,一个用于检验存活性和选举
创建/tmp/zookeeper目录 ,在目录中添加myid文件,文件内容就是本机指定的zookeeper id内容
如:在192.168.0.94机器上
echo 1 > /tmp/zookeeper/myid
启动zookeeper:
bin/zkServer.sh start
开启zk和kafka的时候,一定是先启动zk,再启动kafka
关闭服务的时候,kafka先关闭,再关闭zk
#查看
[root@nginx-kafka03 apache-zookeeper-3.6.3-bin]# bin/zkServer.sh status
/usr/bin/java
ZooKeeper JMX enabled by default
Using config: /opt/apache-zookeeper-3.6.3-bin/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost. Client SSL: false.
Mode: leader
启动kafka:
bin/kafka-server-start.sh -daemon config/server.properties
zookeeper使用: ---------分布式应用协调服务 -- 可以用来做统一配置管理、统一命名服务、分布式锁、集群管理
运行
bin/zkCli.sh
[zk: localhost:2181(CONNECTED) 1] ls /
[admin, brokers, cluster, config, consumers, controller, controller_epoch, feature, isr_change_notification, latest_producer_id_block, log_dir_event_notification, sc, zookeeper]
[zk: localhost:2181(CONNECTED) 2] ls /brokers/ids
[1, 2, 3]
[zk: localhost:2181(CONNECTED) 3] create /sc/yy
Created /sc/yy
[zk: localhost:2181(CONNECTED) 4] ls /sc
[page, xx, yy]
[zk: localhost:2181(CONNECTED) 5] set /sc/yy 90
[zk: localhost:2181(CONNECTED) 6] get /sc/yy
90
#测试
创建topic
bin/kafka-topics.sh --create --zookeeper 192.168.0.95:2181 --replication-factor 1 --partitions 1 --topic sc
查看topic
bin/kafka-topics.sh --list --zookeeper 192.168.0.95:2181
创建生产者
[root@localhost kafka_2.12-2.8.0]# bin/kafka-console-producer.sh --broker-list 192.168.0.94:9092 --topic sc
>hello
>sanchuang tongle
>nihao
>world !!!!!!1
>
创建消费者
[root@localhost kafka_2.12-2.8.0]# bin/kafka-console-consumer.sh --bootstrap-server 192.168.0.96:9092 --topic sc --from-beginning
连接zk:
bin/zkCli.sh
[zk: localhost:2181(CONNECTED) 0] ls /
[admin, brokers, cluster, config, consumers, controller, controller_epoch, feature, isr_change_notification, latest_producer_id_block, log_dir_event_notification, zookeeper]
[zk: localhost:2181(CONNECTED) 1] ls /brokers
[ids, seqid, topics]
[zk: localhost:2181(CONNECTED) 2] ls /brokers/ids
[0, 1, 2]
[zk: localhost:2181(CONNECTED) 3] get /brokers/ids
null
[zk: localhost:2181(CONNECTED) 4] get /brokers/ids/0
{"listener_security_protocol_map":{"PLAINTEXT":"PLAINTEXT"},"endpoints":["PLAINTEXT://nginx-kafka02:9092"],"jmx_port":9999,"features":{},"host":"nginx-kafka02","timestamp":"1642300427923","port":9092,"version":5}
[zk: localhost:2181(CONNECTED) 5] ls /brokers/ids/0
[]
[zk: localhost:2181(CONNECTED) 6] get /brokers/ids/0
{"listener_security_protocol_map":{"PLAINTEXT":"PLAINTEXT"},"endpoints":["PLAINTEXT://nginx-kafka02:9092"],"jmx_port":9999,"features":{},"host":"nginx-kafka02","timestamp":"1642300427923","port":9092,"version":5}
zookeeper 分布式,开源的配置管理服务 etcd
在后端服务器部署filebeat
#安装
1、rpm --import https://packages.elastic.co/GPG-KEY-elasticsearch
2、编辑 vim /etc/yum.repos.d/fb.repo
[elastic-7.x]
name=Elastic repository for 7.x packages
baseurl=https://artifacts.elastic.co/packages/7.x/yum
gpgcheck=1
gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch
enabled=1
autorefresh=1
type=rpm-md
3、yum安装
yum install filebeat -y
rpm -qa |grep filebeat #可以查看filebeat有没有安装 rpm -qa 是查看机器上安装的所有软件包
rpm -ql filebeat 查看filebeat安装路径,牵扯的文件有哪些
4、设置开机自启
systemctl enable filebeat
#ymal格式
{
"filebeat.inputs": [
{ "type":"log",
"enabled":true,
"paths":["/var/log/nginx/sc_access"]
},
],
}
#配置
修改配置文件/etc/filebeat/filebeat.yml
filebeat.inputs:
- type: log
# Change to true to enable this input configuration.
enabled: true
# Paths that should be crawled and fetched. Glob based paths.
paths:
- /var/log/nginx/sc_access.log
#==========------------------------------kafka-----------------------------------
output.kafka:
hosts: ["192.168.229.139:9092","192.168.229.140:9092"]
topic: nginxlog
keep_alive: 10s
#创建主题nginxlog
bin/kafka-topics.sh --create --zookeeper 192.168.77.132:2181 --replication-factor 3 --partitions 1 --topic nginxlog
#启动服务:
systemctl start filebeat
[root@nginx-kafka01 opt]# ps -ef |grep filebeatroot 5537 1 0 15:32 ? 00:00:08 /usr/share/filebeat/bin/filebeat --environment systemd -c /etc/filebeat/filebeat.yml --path.home /usr/share/filebeat --path.config /etc/filebeat --path.data /var/lib/filebeat --path.logs /var/log/filebeat
filebeat数据文件
[root@nginx-kafka01 filebeat]# pwd
/var/lib/filebeat/registry/filebeat
[root@nginx-kafka01 filebeat]# less log.json
数据入库
1、需求分析
需要nginx日志的ip,时间,带宽字段
将ip字段解析成相应的省份、运营商
存入数据库的字段: id, 时间, 省份, 运营商, 带宽
#步骤
1、创建数据表
2、编写python脚本, 从kafka获取nginx日志
3、获取好的nginx日志,提取出ip,时间,带宽字段
4、提取出的ip字段通过淘宝的一个接口解析出省份和运营商
url = "https://ip.taobao.com/outGetIpInfo?accessKey=alibaba-inc&ip=114.114.114.114"
5、格式化时间字段 "2021-10-12 12:00:00"
6、存入数据库
#创建表
create table nginxlog (
id int primary key auto_increment,
dt datetime not null,
prov int ,
isp int,
bd float
) CHARSET=utf8;
create table prov_index(
id int primary key auto_increment,
prov_name varchar(256)
) charset=utf8;
create table isp_index(
id int primary key auto_increment,
isp_name varchar(256)
) charset=utf8;
##################################################
#安装celery
pip install celery
#python里连接redis的模块库,
pip install redis
#编辑celery 参照flask_log/celery_app
#配置celery
################config.py
########celery的配置的文件
from celery.schedules import crontab
#配置消息中间件的地址
BROKER_URL = "redis://192.168.77.132:6379/1"
#配置结果存放地址
CELERY_RESULT_BACKEND = "redis://192.168.77.132:6379/2"
#启动celery时,导入任务, 只有导入任务才能执行
CELERY_IMPORTS = {
'celery_tasks' #存放celery要执行的任务
}
#时区
CELERY_TIMEZONE = "Asia/Shanghai"
#设置定时任务
CELERYBEAT_SCHEDULE = {
'log-every-minute': {
'task' : 'celery_tasks.scheduled_task',
'schedule': crontab(minute='*/1')
}
}
############app.py 存放核心对象的文件
from celery import Celery
#实例化celery对象,传入一个名字即可
celery_app = Celery('celery_app')
celery_app.config_from_object('config')
#############celery_tasks.py 存放任务的文件
from app import celery_app
@celery_app.task
def scheduled_task(*args, **kwargs):
print("this is schedule task")
#启动worker
[root@nginx-kafka01 flask_log]# celery -A app.celery_app worker --loglevel=INFO -n node1
#启动beat
[root@nginx-kafka01 flask_log]# celery -A app.celery_app beat --loglevel=INFO
#celery异步任务
#指令下发
#启动可视化工具flower
celery -A celery_app flower --address=127.0.0.1 --port=5555
#上线部署
nginx + gunicron
nginx + uwsgi
#相比于uwsgi而言,gunicorn配置简单使用方便
#生成requirems.txt文件
pip freeze > reqirements.txt
#在新主机上,安装依赖包
pip3 install -r requirements.txt -i "http://pypi.douban.com/simple" --trusted-host pypi.douban.com
#####启动flask
gunicorn -w 2 -b :8000 manage:app
#修改前端nginx集群配置
server {
listen 80 default_server;
server_name www.sc.com;
root /usr/share/nginx/html;
access_log /var/log/nginx/sc/access.log main;
location /v1 {
proxy_pass http://127.0.0.1:8000;
};
location / {
root /usr/share/nginx/html;
}
}