在Openstack中实现节点高可用的方案中,探测物理节点宕机现象,在程序中调用了socket RAW 协议,但是必须具备root 用户权限,而openstack程序调用是nova用户,于是做了这样的事情,希望能帮助大家
1, 程序调用的错误
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task Traceback (most recent call last):
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/nova/openstack/common/periodic_task.py"
, line
180
, inrun_periodic_tasks
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task task(
self
, context)
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/nova/extend/manager.py"
, line
91
, inhealth_check_host
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task ifnot icmp_ping.icmp_ping(node_resource[s][
'host_ip'
])
and
\
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/nova/extend/icmp_ping.py"
, line
157
, inicmp_ping
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task delay
=
do_one(dest_addr, CONF.timeout)
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/nova/extend/icmp_ping.py"
, line
136
, indo_one
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task ping_socket
=
socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp)
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/eventlet/greenio.py"
, line
116
, in__init__
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task fd
=
_original_socket(family_or_realsock,
*
args,
*
*
kwargs)
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib64/python2.6/socket.py"
, line
184
, in__init__
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task _sock
=
_realsocket(family,
type
, proto)
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task error: [Errno
1
] Operation notpermitted
|
2,icmp_ping 的实现,这是网上一个哥们写的ICMP 的实,下次找到贴上去,,我拿过来做了一些修改,希望作者不要见怪
URL: https://mail.python.org/pipermail/tutor/2009-November/072706.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
# All Rights Reserved.
# Copyright (c) 2010 Citrix Systems, Inc.
# Copyright 2011 Ken Pepple
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
""" To detect physical nodes is active, according to the sock. Write the daemon ICMP protocol """
importos
importsys
#import socket
importstruct
importselect
importtime
importtraceback
fromoslo.config importcfg
fromnova.openstack.common importlog as logging
fromeventlet.green importsocket
icmp_opts
=
[
cfg.StrOpt(
'timeout'
,
default
=
3
,
help
=
'Setting socket timeout'
),
cfg.StrOpt(
'count'
,
default
=
9
,
help
=
'Setting the IMCP PING'
),
]
CONF
=
cfg.CONF
CONF.register_opts(icmp_opts)
LOG
=
logging.getLogger(__name__)
# From /usr/include/linux/icmp.h; your milage may vary.
ICMP_ECHO_REQUEST
=
8
# Seems to be the same on Solaris.
defchecksum(source_string):
"""
I'm not too confident that this is right but testing seems
to suggest that it gives the same answers as in_cksum in ping.c
"""
sum
=
0
countTo
=
(
len
(source_string)
/
2
)
*
2
count
=
0
whilecount<countTo:
thisVal
=
ord
(source_string[count
+
1
])
*
256
+
ord
(source_string[count])
sum
=
sum
+
thisVal
sum
=
sum
&
0xffffffff
# Necessary?
count
=
count
+
2
ifcountTo<
len
(source_string):
sum
=
sum
+
ord
(source_string[
len
(source_string)
-
1
])
sum
=
sum
&
0xffffffff
# Necessary?
sum
=
(
sum
>>
16
)
+
(
sum
&
0xffff
)
sum
=
sum
+
(
sum
>>
16
)
answer
=
~
sum
answer
=
answer &
0xffff
# Swap bytes. Bugger me if I know why.
answer
=
answer >>
8
| (answer <<
8
&
0xff00
)
returnanswer
defreceive_one_ping(ping_socket,
ID
, timeout):
"""
receive the ping from the socket.
"""
timeLeft
=
timeout
whileTrue:
startedSelect
=
time.clock()
whatReady
=
select.select([ping_socket], [], [], timeLeft)
howLongInSelect
=
(time.clock()
-
startedSelect)
ifwhatReady[
0
]
=
=
[]:
# Timeout
return
timeReceived
=
time.clock()
recPacket, addr
=
ping_socket.recvfrom(
1024
)
icmpHeader
=
recPacket[
20
:
28
]
type
, code, checksum, packetID, sequence
=
struct.unpack(
"bbHHh"
, icmpHeader
)
ifpacketID
=
=
ID
:
bytesInDouble
=
struct.calcsize(
"d"
)
timeSent
=
struct.unpack(
"d"
, recPacket[
28
:
28
+
bytesInDouble])[
0
]
returntimeReceived
-
timeSent
timeLeft
=
timeLeft
-
howLongInSelect
iftimeLeft <
=
0
:
return
defsend_one_ping(ping_socket, dest_addr,
ID
):
"""
Send one ping to the given >dest_addr<.
"""
dest_addr
=
socket.gethostbyname(dest_addr)
# Header is type (8), code (8), checksum (16), id (16), sequence (16)
my_checksum
=
0
# Make a dummy heder with a 0 checksum.
header
=
struct.pack(
"bbHHh"
, ICMP_ECHO_REQUEST,
0
, my_checksum,
ID
,
1
)
bytesInDouble
=
struct.calcsize(
"d"
)
data
=
(
192
-
bytesInDouble)
*
"Q"
data
=
struct.pack(
"d"
, time.clock())
+
data
# Calculate the checksum on the data and the dummy header.
my_checksum
=
checksum(header
+
data)
header
=
struct.pack(
"bbHHh"
, ICMP_ECHO_REQUEST,
0
, socket.htons(my_checksum),
ID
,
1
)
packet
=
header
+
data
ping_socket.sendto(packet, (dest_addr,
1
))
# Don't know about the 1
defdo_one(dest_addr, timeout):
"""
Returns either the delay (in seconds) or none on timeout.
"""
icmp
=
socket.getprotobyname(
"icmp"
)
try
:
ping_socket
=
socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp)
#ping_socket = socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp)
exceptsocket.error, (errno, msg):
iferrno
=
=
1
:
# Operation not permitted
msg
=
msg
+
(
" - Note that ICMP messages can only be sent from processes"
" running as root."
)
LOG.error(socket.error(msg))
raise
# raise the original error
my_ID
=
os.getpid() &
0xFFFF
send_one_ping(ping_socket, dest_addr, my_ID)
delay
=
receive_one_ping(ping_socket, my_ID, timeout)
ping_socket.close()
returndelay
deficmp_ping(dest_addr):
active
=
False
fori inxrange(CONF.count):
try
:
delay
=
do_one(dest_addr, CONF.timeout)
exceptsocket.gaierror, e:
active
=
False
break
ifdelay
=
=
None
:
active
=
False
else
:
active
=
True
return
active
|
3,我单独写了一个openstack 的组件,来实现高可用(nova-extend),用他来调用 ICMP_PING
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
"""Starter script for Nova Extend."""
importsys
fromoslo.config importcfg
fromnova importconfig
fromnova importobjects
fromnova.openstack.common importlog as logging
fromnova importservice
fromnova importutils
CONF
=
cfg.CONF
CONF.import_opt(
'topic'
,
'nova.extend.api'
, group
=
'extend'
)
defmain():
objects.register_all()
config.parse_args(sys.argv)
logging.setup(
"nova"
)
utils.monkey_patch()
|
4, nova-extend 启动服务的时候会调用nova.extend.manager.ExtendManager
1
2
3
4
|
ifnot icmp_ping.icmp_ping(node_resource[s][
'host_ip'
])
and
\
utils.is_false(CONF.extend_high_availability):
LOG.info(
"The compute node [%s] is down "
%
s)
instances_uuid
=
db.instances_uuid_by_host(context,s)
|
5, 程序就会报错,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task Traceback (most recent call last):
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/nova/openstack/common/periodic_task.py"
, line
180
, inrun_periodic_tasks
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task task(
self
, context)
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/nova/extend/manager.py"
, line
91
, inhealth_check_host
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task ifnot icmp_ping.icmp_ping(node_resource[s][
'host_ip'
])
and
\
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/nova/extend/icmp_ping.py"
, line
157
, inicmp_ping
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task delay
=
do_one(dest_addr, CONF.timeout)
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/nova/extend/icmp_ping.py"
, line
136
, indo_one
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task ping_socket
=
socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp)
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib/python2.6/site-packages/eventlet/greenio.py"
, line
116
, in__init__
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task fd
=
_original_socket(family_or_realsock,
*
args,
*
*
kwargs)
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task
File
"/usr/lib64/python2.6/socket.py"
, line
184
, in__init__
2014
-
03
-
1309
:
33
:
08.4081916TRACE
nova.openstack.common.periodic_task _sock
=
_realsocket(family,
type
, proto)
2
|
6 ,我用python 调用是OK 的,但是openstack调用依然报错,我以为是协程限制导致,岂不是,我是这样处理的
1, copy /usr/bin/python /usr/bin/python-extend
2, chown root:root /usr/bin/python-extend
3, chmod ug+s /usr/bin/python-extend
4, 修改/usr/bin/nova-extend
#!/usr/bin/python-extend
5,重启服务就OK 了