nagios实现邮件、短信警报设置

最新推荐文章于 2025-05-30 13:41:20 发布

原创最新推荐文章于 2025-05-30 13:41:20 发布 · 7.3k 阅读

1 ·

CC 4.0 BY-SA版权

服务器监控专栏收录该内容

22 篇文章

订阅专栏

本文详细介绍了如何配置Nagios实现邮件和短信警报。首先，确保Nagios服务正常运行并能监控客户端，然后安装和配置sendmail邮件服务进行测试。接着，编辑contact.cfg和commands.cfg文件以设置警报通知，包括添加管理员邮箱和创建联系人组。通过示例配置展示了如何为不同服务和主机指定联系人组，以便在出现故障时发送警报。此外，还讨论了如何调整Nagios的检测间隔时间。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1. 确保nagios服务能成功启动和访问。并且能够监控客户端。

1）安装sendmail或postfix等邮件服务（这里以sendmail为例）。

1 安装sendmail组件
首先要确保sendmail相关组件的完整安装，我们可以使用如下的命令来完成sendmail的安装：
# yum install -y sendmail*
然后重新启动sendmail服务：
# service sendmail restart
然后发送测试邮件，验证sendmail的可用性：
# echo "Hello World" | mail xxxxxxxx@139.com

# echo "Hello World" | mail xxxxxxxx@163.com

最好是申请两个邮箱做测试。意为两个管理人员。

2) 配置。警报配置只需要在服务器端配置即可。需要编辑的文件有：

1./usr/local/nagios/etc/object下的contact.cfg

2./usr/local/nagios/etc/object下的commands.cfg （经测试，这个文件不用编辑，还没研究透）

如果你不放心，可以加多以下这些代码。其他不用改动。

#define command{
#       command_name    notify-by-email
#      command_line    /usr/bin/printf "%b" "***** Nagios 2.9 *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $
HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /usr/local/bin/sendmail -f nagios@test.com -t $CONTACTEMAIL$
-s mail.test.com -u "** $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" -xu nagios -xp p#3isoda
   #     }

注释掉了还是能成功的，有待研究 ~~ 网上说法太不统一了

2) 成功案例配置：

[root@bogon objects]# cat contacts.cfg
define contact{
        contact_name                    nagiosadmin             ; Short name of user
        use                             generic-contact         ; Inherit default values from generic-contact template (defined above)
        alias                           Nagios Admin            ; Full name of user
        email                           nagios@localhost        ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
        }
define contactgroup{
        contactgroup_name       admins
        alias                   Nagios Administrators
        members                 nagiosadmin
        }

#######################################################################################
#######################################################################################
##############   SYSTEM ADMINISTRATOR MEMBERS
#######################################################################################
#######################################################################################

define contact{
        contact_name                        li1
        use                                        generic-contact
        alias                                       li1
        service_notification_period     24x7
        host_notification_period          24x7
        service_notification_options    w,u,c,r,f,s
        host_notification_options         d,u,r,f,s
        service_notification_commands   notify-service-by-email
        host_notification_commands      notify-host-by-email
        email                                       15218402325@139.com
        }

define contact{
        contact_name                           li2
        use                                          generic-contact
        alias                                         li2
        service_notification_period      24x7
        host_notification_period          24x7
        service_notification_options     w,u,c,r,f,s
        host_notification_options         d,u,r,f,s
        service_notification_commands   notify-service-by-email
        host_notification_commands      notify-host-by-email
        email                                          15218402325@163.com
        }

#######################################################################################
#######################################################################################
##############   SYSTEM ADMINISTRATOR GROUP
#######################################################################################
#######################################################################################
define contactgroup{
        contactgroup_name              system
        alias                                     system
        members                              li1,li2
        }
[root@bogon objects]#

以上意思是将两个管理员 li1、li2放到一个组system里。有意思了，可以按组来分配警报，例如有监控网络的，有监控系统的，他们都可以分别放在一个组里头。然后按组来监控某些服务或者主机。

相应的联系人和联系给已经创建好了，接下来的就是在被监控的服务中添加故障的联系人了，以下面定义的监控主机和服务为例

[root@bogon objects]# more linux.cfg
define host{
           use             linux-server
          host_name        124.172.223.141
          alias            124.172.223.141
          address        124.172.223.141
        }
define service{
        use                      generic-service
        host_name             124.172.223.141
        service_description     check-swap
        check_command        check_nrpe!check_swap
        contact_groups                   system
               }
define service{
        use                      generic-service
        host_name             124.172.223.141
        service_description     check-load
       check_command         check_nrpe!check_load
       contact_groups                   system
               }
define service{
        use                      generic-service
        host_name             124.172.223.141
       service_description     check-disk
       check_command        check_nrpe!check_df
       contact_groups                   system
                   }
define service{
        use                      generic-service
        host_name             124.172.223.141
       service_description     check-users
       check_command        check_nrpe!check_users
       contact_groups                   system
               }
define service{
        use                    generic-service
        host_name           124.172.223.141
        service_description otal_procs
        check_command     check_nrpe!check_total_procs
        notifications_enabled            1
        contact_groups                   system
}

define host{
           use             linux-server
          host_name        124.172.223.142
          alias            124.172.223.142
          address          124.172.223.142
        }
define service{
        use                      generic-service
        host_name             124.172.223.142
        service_description     check-swap
        check_command        check_nrpe!check_swap
        contact_groups                   system
               }
define service{
        use                      generic-service
        host_name             124.172.223.142
        service_description     check-load
       check_command         check_nrpe!check_load
        contact_groups                   system
               }
define service{
        use                      generic-service
        host_name             124.172.223.142
       service_description     check-disk
       check_command        check_nrpe!check_df
       contact_groups                   system
                   }
define service{
        use                      generic-service
        host_name             124.172.223.142
       service_description     check-users
       check_command        check_nrpe!check_users
       contact_groups                   system
               }
define service{
        use                            generic-service
        host_name                      124.172.223.142
        service_description            otal_procs
        check_command                  check_nrpe!check_total_procs
        contact_groups                   system
        notifications_enabled            1
}
[root@bogon objects]#

以上的意思是全部服务或主机都由system来接受警报。呵呵有意思

如上面配置所示，当监控主机的ping出现问题的时候，nagios就会查看contact.cfg中定义的联系人组system中的联系人的信息，然后读取各联系人的邮件地址，当服务器出现问题的时候就会给system组的相关人员发送邮件了

3.如果要设置多久发一次邮件，或者更新检查服务器的时间间隔等，我们就要另外编辑文件了。一般要修改两个文件达到自己的目的。

文件一：nagios.cfg

修改以下（经测试，貌似间隔多久发一次警报与nagios.cfg配置无关，好像只是第一次开始检测服务异常所用的时间。那么间隔的话只与templates.cfg配置有关）：

max_service_check_spread=30              | max_service_check_spread=1
max_host_check_spread=30                   | max_host_check_spread=10
notification_timeout=30                           | notification_timeout=5
service_freshness_check_interval=60    | service_freshness_check_interval=10
host_freshness_check_interval=60         | host_freshness_check_interval=10

文件二：templates.cfg

左边为新文件。

[root@bogon objects]# sdiff -s templates.cfg templates.cfg.defult
        check_interval                  1               ; Act |         check_interval                  5               ; Act
        max_check_attempts              1               ; Che |         max_check_attempts              10              ; Che
        notification_interval           1               ; Res |         notification_interval           120             ; Res
        check_interval          1               ; Actively ch |         check_interval          5               ; Actively ch
        max_check_attempts      1               ; Check each |         max_check_attempts      10              ; Check each
        notification_interval   1               ; Resend noti |         notification_interval   30              ; Resend noti
        check_interval          1               ; Actively ch |         check_interval          5               ; Actively ch
        max_check_attempts      1               ; Check each |         max_check_attempts      10              ; Check each
        notification_interval   1               ; Resend noti |         notification_interval   30              ; Resend noti
        check_interval          1               ; Switches ar |         check_interval          5               ; Switches ar
        max_check_attempts      1               ; Check each |         max_check_attempts      10              ; Check each
        notification_interval   1               ; Resend noti |         notification_interval   30              ; Resend noti
        normal_check_interval           1                     |         normal_check_interval           10
        retry_check_interval            1                     |         retry_check_interval            2
        notification_interval           1                     |         notification_interval           60
        normal_check_interval           1                     |         normal_check_interval           5
[root@bogon objects]#

完成以上templates.cfg。暂时不一一研究每个参数先。以上用新文件就可以按每分钟来发警报了。

以下是templates.cfg整个内容：

[root@bogon objects]# cat templates.cfg
###############################################################################
# TEMPLATES.CFG - SAMPLE OBJECT TEMPLATES
#
# Last Modified: 10-03-2007
#
# NOTES: This config file provides you with some example object definition
#        templates that are refered by other host, service, contact, etc.
#        definitions in other config files.
#
#        You don't need to keep these definitions in a separate file from your
#        other object definitions. This has been done just to make things
#        easier to understand.
#
###############################################################################

###############################################################################
###############################################################################
#
# CONTACT TEMPLATES
#
###############################################################################
###############################################################################

# Generic contact definition template - This is NOT a real contact, just a template!

define contact{
        name                            generic-contact         ; The name of this contact template
        service_notification_period     24x7                    ; service notifications can be sent anytime
        host_notification_period        24x7                    ; host notifications can be sent anytime
        service_notification_options    w,u,c,r,f,s             ; send notifications for all service states, flapping events, and scheduled downtime events
        host_notification_options       d,u,r,f,s               ; send notifications for all host states, flapping events, and scheduled downtime events
        service_notification_commands   notify-service-by-email ; send service notifications via email
        host_notification_commands      notify-host-by-email    ; send host notifications via email
        register                        0                       ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL CONTACT, JUST A TEMPLATE!
        }

###############################################################################
###############################################################################
#
# HOST TEMPLATES
#
###############################################################################
###############################################################################

# Generic host definition template - This is NOT a real host, just a template!

define host{
        name                            generic-host    ; The name of this host template
        notifications_enabled           1               ; Host notifications are enabled
        event_handler_enabled           1               ; Host event handler is enabled
        flap_detection_enabled          1               ; Flap detection is enabled
        failure_prediction_enabled      1               ; Failure prediction is enabled
        process_perf_data               1               ; Process performance data
        retain_status_information       1               ; Retain status information across program restarts
        retain_nonstatus_information    1               ; Retain non-status information across program restarts
        notification_period             24x7            ; Send host notifications at any time
        register                        0               ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
        }

# Linux host definition template - This is NOT a real host, just a template!

define host{
        name                            linux-server    ; The name of this host template
        use                             generic-host    ; This template inherits other values from the generic-host template
        check_period                    24x7            ; By default, Linux hosts are checked round the clock
        check_interval                  1               ; Actively check the host every 5 minutes
        retry_interval                  1               ; Schedule host check retries at 1 minute intervals
        max_check_attempts              1               ; Check each Linux host 10 times (max)
        check_command                   check-host-alive ; Default command to check Linux hosts
        notification_period             workhours       ; Linux admins hate to be woken up, so we only notify during the day
                                                        ; Note that the notification_period variable is being overridden from
                                                        ; the value that is inherited from the generic-host template!
        notification_interval           1               ; Resend notifications every 2 hours
        notification_options            d,u,r           ; Only send notifications for specific host states
        contact_groups                  admins          ; Notifications get sent to the admins by default
        register                        0               ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
        }

# Windows host definition template - This is NOT a real host, just a template!

define host{
        name                    windows-server ; The name of this host template
        use                     generic-host    ; Inherit default values from the generic-host template
        check_period            24x7            ; By default, Windows servers are monitored round the clock
        check_interval          1               ; Actively check the server every 5 minutes
        retry_interval          1               ; Schedule host check retries at 1 minute intervals
        max_check_attempts      1               ; Check each server 10 times (max)
        check_command           check-host-alive        ; Default command to check if servers are "alive"
        notification_period     24x7            ; Send notification out at any time - day or night
        notification_interval   1               ; Resend notifications every 30 minutes
        notification_options    d,r             ; Only send notifications for specific host states
        contact_groups          admins          ; Notifications get sent to the admins by default
        hostgroups              windows-servers ; Host groups that Windows servers should be a member of
        register                0               ; DONT REGISTER THIS - ITS JUST A TEMPLATE
        }

# We define a generic printer template that can be used for most printers we monitor

define host{
        name                    generic-printer ; The name of this host template
        use                     generic-host    ; Inherit default values from the generic-host template
        check_period            24x7            ; By default, printers are monitored round the clock
        check_interval          1               ; Actively check the printer every 5 minutes
        retry_interval          1               ; Schedule host check retries at 1 minute intervals
        max_check_attempts      1               ; Check each printer 10 times (max)
        check_command           check-host-alive        ; Default command to check if printers are "alive"
        notification_period     workhours               ; Printers are only used during the workday
        notification_interval   1               ; Resend notifications every 30 minutes
        notification_options    d,r             ; Only send notifications for specific host states
        contact_groups          admins          ; Notifications get sent to the admins by default
        register                0               ; DONT REGISTER THIS - ITS JUST A TEMPLATE
        }

# Define a template for switches that we can reuse
define host{
        name                    generic-switch ; The name of this host template
        use                     generic-host    ; Inherit default values from the generic-host template
        check_period            24x7            ; By default, switches are monitored round the clock
        check_interval          1               ; Switches are checked every 5 minutes
        retry_interval          1               ; Schedule host check retries at 1 minute intervals
        max_check_attempts      1               ; Check each switch 10 times (max)
        check_command           check-host-alive        ; Default command to check if routers are "alive"
        notification_period     24x7            ; Send notifications at any time
        notification_interval   1               ; Resend notifications every 30 minutes
        notification_options    d,r             ; Only send notifications for specific host states
        contact_groups          admins          ; Notifications get sent to the admins by default
        register                0               ; DONT REGISTER THIS - ITS JUST A TEMPLATE
        }

###############################################################################
###############################################################################
#
# SERVICE TEMPLATES
#
###############################################################################
###############################################################################

# Generic service definition template - This is NOT a real service, just a template!

define service{
        name                            generic-service         ; The 'name' of this service template
        active_checks_enabled           1                       ; Active service checks are enabled
        passive_checks_enabled          1                       ; Passive service checks are enabled/accepted
        parallelize_check               1                       ; Active service checks should be parallelized (disabling this can lead to major performance problems)
        obsess_over_service             1                       ; We should obsess over this service (if necessary)
        check_freshness                 0                       ; Default is to NOT check service 'freshness'
        notifications_enabled           1                       ; Service notifications are enabled
        event_handler_enabled           1                       ; Service event handler is enabled
        flap_detection_enabled          1                       ; Flap detection is enabled
        failure_prediction_enabled      1                       ; Failure prediction is enabled
        process_perf_data               1                       ; Process performance data
        retain_status_information       1                       ; Retain status information across program restarts
        retain_nonstatus_information    1                       ; Retain non-status information across program restarts
        is_volatile                     0                       ; The service is not volatile
        check_period                    24x7                    ; The service can be checked at any time of the day
        max_check_attempts              3                       ; Re-check the service up to 3 times in order to determine its final (hard) state
        normal_check_interval           1                       ; Check the service every 10 minutes under normal conditions
        retry_check_interval            1                       ; Re-check the service every two minutes until a hard state can be determined
        contact_groups                  admins                  ; Notifications get sent out to everyone in the 'admins' group
        notification_options            w,u,c,r                 ; Send notifications about warning, unknown, critical, and recovery events
        notification_interval           1                       ; Re-notify about service problems every hour defult 60
        notification_period             24x7                    ; Notifications can be sent out at any time
         register                        0                      ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
        notes_url               /nagios/cgi-bin/show.cgi?host=$HOSTNAME$&service=$SERVICEDESC$
        }

# Local service definition template - This is NOT a real service, just a template!

define service{
        name                            local-service           ; The name of this service template
        use                             generic-service         ; Inherit default values from the generic-service definition
        max_check_attempts              4                       ; Re-check the service up to 4 times in order to determine its final (hard) state
        normal_check_interval           1                       ; Check the service every 5 minutes under normal conditions
        retry_check_interval            1                       ; Re-check the service every minute until a hard state can be determined
        register                        0                       ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
        }

[root@bogon objects]#