vim templates.cfg
define host {
name generic-printer
use generic-host
check_period 24x7
check_interval 5
retry_interval 1
max_check_attempts 10
check_command check-host-alive
notification_period workhours
notification_interval 30 # 每隔30分钟向指定的联系人或联系人组(在此配置中为 admins)发送一次通知
notification_options d,r
contact_groups admins
statusmap_image printer.png
register 0
}
define service {
name generic-service ; The 'name' of this service template
active_checks_enabled 1 ; Active service checks are enabled
passive_checks_enabled 1 ; Passive service checks are enabled/accepted
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
obsess_over_service 1 ; We should obsess over this service (if necessary)
check_freshness 0 ; Default is to NOT check service 'freshness'
notifications_enabled 1 ; Service notifications are enabled
event_handler_enabled 1 ; Service event handler is enabled
flap_detection_enabled 1 ; Flap detection is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
is_volatile 0 ; The service is not volatile
check_period 24x7 ; The service can be checked at any time of the day
max_check_attempts 3 ; Re-check the service up to 3 times in order to determine its final (hard) state
;normal_check_interval 1 ; 系统每隔多长时间检查一次某个服务或主机的状态,下方这个参数也是一样
check_interval 1 ; 系统每隔多长时间检查一次某个服务或主机的状态
retry_interval 2 ; Re-check the service every two minutes until a hard state can be determined
contact_groups admins ; Notifications get sent out to everyone in the 'admins' group
notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events
notification_interval 60 ; Re-notify about service problems every hour
notification_period 24x7 ; Notifications can be sent out at any time
register 0 ; DON'T REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}
define service {
name local-service ; The name of this service template
use generic-service ; Inherit default values from the generic-service definition
max_check_attempts 4 ; Re-check the service up to 4 times in order to determine its final (hard) state
;normal_check_interval 1 ; 系统每隔多长时间检查一次某个服务或主机的状态,下方这个参数也是一样
check_interval 1 ; 系统每隔多长时间检查一次某个服务或主机的状态
retry_interval 1 ; Re-check the service every minute until a hard state can be determined
register 0 ; DON'T REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}
define service {
name es-service ;Elastic search service
use generic-service ;Inheric
contact_groups es-admin ;es admin
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}
# define host 代表的是主机模版
# define service 代表的是监控模版,监控模版就有两项,一个是 generic-service 一个是基于generic-service分割出来一部分local-service监控nagios本机
修改配置五:
# 先测试一下看看邮件是否可以发送成功
echo "how are you today" | mail -s "test" 1893130****@163.com
# 修改配置
more contacts.cfg | grep -Ev '^$|^#'
# 发送邮件配置
define contact {
contact_name yuhailong ; Short name of user
use generic-contact ; Inherit default values from generic-contact template (defined above)
alias Yu Hai Long ; Full name of user
email 1893130****@163.com ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
; pager # 如果有短信告警,这里可以选填手机号
}
; 多个以此类推
;例如如果有第二个人
define contact {
contact_name zhangsan ; Short name of user
use generic-contact ; Inherit default values from generic-contact template (defined above)
alias Zhang San ; Full name of user
email zhangsan@163.com ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
; pager # 如果有短信告警,这里可以选填手机号
}
define contactgroup {
contactgroup_name admins
alias Nagios Administrators
members yuhailong,zhangsan ; 多个以逗号分隔,
}
define contactgroup {
contactgroup_name es-admin
alias Elastic Search Administrators
members yuhailong
}
###启动服务
# 检测nagios配置文件是否正确
nagios -v /etc/nagios/nagios.cfg
# 保证如下两项为0即可
Total Warnings: 0
Total Errors: 0
systemctl restart nagios
systemctl enable nagios
访问:http://10.3.3.200:8080/nagios/
配置监控项
配置需要监控的主机
vim /etc/nagios/objects/hosts.cfg
# 配置的是需要监控的主机。
# use 使用linux-server 模版,模版在templates.cfg定义的 linux-server
# 定义名称,自定义
# alias 定义别名,自定义
# address 监控主机的IP地址
define host {
use linux-server ; Name of host template to use
host_name 160
alias 160
address 192.168.3.160
}
define host {
use linux-server ; Name of host template to use
host_name 200
alias 200
address 192.168.3.200
}
define host {
use linux-server ; Name of host template to use
host_name 162
alias 162
address 192.168.3.162
}
配置需要监控的主机组
vim /etc/nagios/objects/hostsgroup.cfg
define hostgroup {
hostgroup_name nrpe-servers ; The name of the hostgroup
alias Servers with Nrpe ; Long name of the group
members * ; Comma separated list of hosts that belong to this group
}
# 定义组
# hostgroup_name 组名称
# alias 别名
# members 组成员
# 注释
members *
# * 代表所有主机
members *!160 # 表示组成员为所有,但是不包括160
# 在主机组配置中,! 用作排除操作符,用于从集合中排除特定元素。
# 在监控命令(servers)或参数中,! 用作分隔符,用于分隔命令的不同参数。
members 160
# 表示当前组里面只有160一个主机,
members 160,162
# 表示当前组里面有160和162两个主机,多个用逗号分隔
配置需要监控的主机的服务
vim /etc/nagios/objects/service.cfg
define service {
use generic-service ; Name of service template to use
; host 160
hostgroup_name nrpe-servers
service_description PING
check_command check_ping!100.0,20%!500.0,60%
notifications_enabled 1
servicegroups ping-service
}
define service {
use generic-service ; Name of service template to use
hostgroup_name nrpe-servers
service_description Root Partition
check_command check_nrpe_arg!check_disk!/
notifications_enabled 1
servicegroups partition-free-space-services
}
define service {
use generic-service ; Name of service template to use
hostgroup_name nrpe-servers
service_description Var Partition
check_command check_nrpe_arg!check_disk!/var
notifications_enabled 1
servicegroups partition-free-space-services
}
define service {
use generic-service ; Name of service template to use
hostgroup_name nrpe-servers
service_description data Partition
check_command check_nrpe_arg!check_disk!/data
notifications_enabled 1
servicegroups partition-free-space-services
}
define service {
use generic-service ; Name of service template to use
hostgroup_name nrpe-servers
service_description Current Load
check_command check_nrpe!check_load
servicegroups checkload-service
notifications_enabled 0
}
define service {
use generic-service ; Name of service template to use
host_name 200
service_description jumpserver 8089
check_command check_http!8089! -u /
notifications_enabled 1
servicegroups http-services
}
define service {
use generic-service ; Name of service template to use
host_name 200
service_description zabbix 8080
check_command check_http!8080! -u /zabbix ;如果是外网域名的话 使用 -H 指定 如下 域名/IP 都可以
notifications_enabled 1
servicegroups http-services
}
;define service {
; use generic-service ; Name of service template to use
; ;hostgroup_name http-www
; host_name 200
; service_description Http www.okcis.cn
; check_command check_http!9000!-H www.okcis.cn -u /php/echo.php ;-H 118.144.81.101
; notifications_enabled 1
; servicegroups http-services
;}
define service {
use generic-service ; Name of service template to use
host 200
service_description Mysql
check_command check_mysql!root!devops@123
notifications_enabled 1
servicegroups mysql-service
}
define service {
use generic-service ; Name of service template to use
host 160
service_description Mysql
check_command check_mysql!root!dtzxroot@2002.DHW
notifications_enabled 1
servicegroups mysql-service
}
define service {
use generic-service ; Name of service template to use
host_name 160
service_description Redis
check_command check_tcp!6379
notifications_enabled 1
servicegroups redis-service
}
;-----------------------------------------------Local Service ---------------------------------------------------------
; 本机监控
define service {
use local-service ; Name of service template to use
host_name 200
service_description Current Load
check_command check_local_load!50.0,40.0,30.0!80.0,60.0,50.0
servicegroups checkload-service
}
define service {
use local-service ; Name of service template to use
host_name 200
service_description Data Partition
check_command check_local_disk!1%!1%!/data
notifications_enabled 1
servicegroups partition-free-space-services
}
define service {
use local-service ; Name of service template to use
host_name 200
service_description Var Partition
check_command check_local_disk!20%!10%!/var
notifications_enabled 1
servicegroups partition-free-space-services
}
define service {
use local-service ; Name of service template to use
host_name 200
service_description Root Partition
check_command check_local_disk!20%!10%!/
notifications_enabled 1
servicegroups partition-free-space-services
}
# use local-service : 表示本机nagios使用,其余的都使用 generic-service
# 分号代表注释,host可以写单个主机或多个主机,多个主机以逗号分隔,
# hostgroup_name : 代表属于这个主机组的服务器都适用这个监控项,hostsgroup_name是在hostsgroup.cfg中定义的
# service_description : nagios页面展示的名称,对应上要监控的服务名称
# check_command : 监控服务 # check_local_disk 检查本地磁盘空间使用情况的插件,!20%!10%! 这部分是传递给check_local_disk命令的参数 / 这个是根的意思
# check_command :这条命令的意思是 监控 根目录,其中设置了警告和严重阈值分别为20%和10%。如果实际使用情况超过了这些阈值,监控系统将相应地发出警告
# notifications_enabled 发生问题是否告警,1是true 0是false
# servicegroups 定义一个server组,方便界面查询。
配置需要监控的主机的服务组
more /etc/nagios/objects/servicegroup.cfg
define servicegroup {
servicegroup_name ping-service
alias Ping Service
}
define servicegroup {
servicegroup_name http-services
alias http Service
}
define servicegroup {
servicegroup_name partition-free-space-services
alias Partition Free Space Services
}
define servicegroup {
servicegroup_name checkload-service
alias Current Load Service
}
define servicegroup {
servicegroup_name mysql-service
alias mysql Service
}
define servicegroup {
servicegroup_name redis-service
alias redis Service
}