exadata计算节点在安装GI时无法发现griddisk

本文详细介绍了Oracle Exadata中计算节点如何通过配置文件发现存储节点的磁盘资源,包括ipaddress参数设置、bond配置差异、cellinit.ora与cellip.ora文件的作用,以及使用kfod.bin工具验证磁盘发现过程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

计算节点要想发现cellnode的磁盘,需要在计算节点配置两个文件,所有的计算节点都需要配置
计算节点上的配置文件的子网掩码要和存储节点上的配置文件的子网掩码保持一致

[root@slcz01db03 network-config]# pwd
/etc/oracle/cell/network-config
cellinit.ora里面记录计算节点的ip地址,和cellnode连接的infiniband 的ip地址,共四个或者8个
不要都是ipaddress1,要分别为ipaddress1、2、3、4


错误的写法:
ipaddress1=192.168.64.190/21
ipaddress1=192.168.64.191/21
ipaddress1=192.168.64.192/21
ipaddress1=192.168.64.193/21

[root@slcz01db03 network-config]# cat cellinit.ora
ipaddress1=192.168.64.190/21
ipaddress2=192.168.64.191/21
ipaddress3=192.168.64.192/21
ipaddress4=192.168.64.193/21
#ipaddress2=192.168.64.101/21
#ipaddress3=192.168.64.102/21
#ipaddress4=192.168.64.103/21
_ipcdat_device_list="bondib0"
#_ipcdat_device_list="bondib0,bondib1,bondib2,bondib3"
#_cell_disable_ipcdat_on_client=true
#_cell_enable_ipcdat_on_client=true
cellip.ora记录的是cellnode的ip地址,此处有两个cell node
[root@slcz01db03 network-config]# cat cellip.ora
cell="192.168.64.182"
cell="192.168.64.183"
#cell="192.168.64.184"

[root@scaqai06adm08 network-config]# cat cellinit.ora
_cell_enable_ipcdat_srq=false
#ipaddress2=192.168.41.222/20
_cell_disable_ipcdat_on_client=true
#ipaddress1=192.168.41.221/20
ipaddress1=192.168.41.221/21
ipaddress2=192.168.41.222/21
[root@scaqai06adm08 network-config]# cat cellip.ora
cell="192.168.41.245;192.168.41.246"
cell="192.168.41.247;192.168.41.248"
cell="192.168.41.249;192.168.41.250"

对于computenode上的ib卡使用或者不使用bond的情况

使用bond的情况

[root@slcm05adm01 network-config]# ifconfig -a | grep ib[0-9] -A 1
bondib0: flags=5187<UP,BROADCAST,RUNNING,MASTER,MULTICAST>  mtu 65520
        inet 192.168.0.1  netmask 255.255.240.0  broadcast 192.168.15.255
--
ib0: flags=6211<UP,BROADCAST,RUNNING,SLAVE,MULTICAST>  mtu 65520
        infiniband 80:00:02:08:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)
--
ib1: flags=6211<UP,BROADCAST,RUNNING,SLAVE,MULTICAST>  mtu 65520
        infiniband 80:00:02:09:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)

[root@slcm05adm01 network-config]# pwd
/etc/oracle/cell/network-config
[root@slcm05adm01 network-config]# cat cellinit.ora
ipaddress1=192.168.0.1/20

#####################################################
不是用bond的情况
[root@scaqai06adm07 network-config]# ifconfig -a | grep ib[0-9] -A 1

ib0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 65520
        inet 192.168.1.57  netmask 255.255.240.0  broadcast 192.168.15.255
--
ib1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 65520
        inet 192.168.1.58  netmask 255.255.240.0  broadcast 192.168.15.255


[root@scaqai06adm07 cell]# cd network-config/
[root@scaqai06adm07 network-config]# ls
cellinit.ora  cellip.ora
[root@scaqai06adm07 network-config]# cat cellinit.ora
ipaddress1=192.168.1.57/20
ipaddress2=192.168.1.58/20

对于cellnode上使用bond 和不是用bond的情况下computenode的cellip.ora


使用bond的情况
[root@slcm05celadm01 ~]# ifconfig -a | grep ib[0-9] -A 1
bondib0: flags=5187<UP,BROADCAST,RUNNING,MASTER,MULTICAST>  mtu 1500
        inet 192.168.0.17  netmask 255.255.240.0  broadcast 192.168.15.255
--
ib0: flags=6211<UP,BROADCAST,RUNNING,SLAVE,MULTICAST>  mtu 1500
        infiniband 80:00:02:08:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)
--
ib1: flags=6211<UP,BROADCAST,RUNNING,SLAVE,MULTICAST>  mtu 1500
        infiniband 80:00:02:09:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)


[root@slcm05adm01 network-config]# pwd
/etc/oracle/cell/network-config
[root@slcm05adm01 network-config]# cat cellip.ora
cell="192.168.0.17"
cell="192.168.0.19"
cell="192.168.0.21"
cell="192.168.0.23"
cell="192.168.0.25"
cell="192.168.0.27"
cell="192.168.0.29"


######################################
不使用bond的情况
[root@scaqai06celadm14 ~]# ifconfig -a | grep ib[0-9] -A 1
ib0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 2044
        inet 192.168.1.87  netmask 255.255.240.0  broadcast 192.168.15.255
--
ib1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 2044
        inet 192.168.1.88  netmask 255.255.240.0  broadcast 192.168.15.255
[root@scaqai06adm08 network-config]# cat cellip.ora
cell="192.168.1.83;192.168.1.84"
cell="192.168.1.85;192.168.1.86"
cell="192.168.1.87;192.168.1.88"
[root@scaqai06adm08 network-config]# pwd
/etc/oracle/cell/network-config

对于cellnode上使用bond 和不是用bond的情况下cellnode的cellinit.ora

不使用bond的情况
[root@scaqai06celadm14 ~]# ifconfig -a | grep ib[0-9] -A 1
ib0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 2044
        inet 192.168.1.87  netmask 255.255.240.0  broadcast 192.168.15.255
--
ib1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 2044
        inet 192.168.1.88  netmask 255.255.240.0  broadcast 192.168.15.255
        
[root@scaqai06celadm14 ~]# imageinfo | grep -i '^Active image version'
Active image version: 21.2.11.0.0.220414.1

要选对目录,image应该就是当前active的image
[root@scaqai06celadm14 config]# pwd
/opt/oracle/cell/cellofl-21.2.11.0.0_LINUX.X64_220414.1/cellsrv/deploy/config
[root@scaqai06celadm14 config]# cat cellinit.ora
#CELL Initialization Parameters
_cell_ramcache_mode=On
ipaddress2=192.168.1.88/20
ipaddress1=192.168.1.87/20




使用bond的情况
#############################################################
[root@slcm05celadm01 config]# ifconfig -a | grep -i ib[0-9] -A 1
bondib0: flags=5187<UP,BROADCAST,RUNNING,MASTER,MULTICAST>  mtu 1500
        inet 192.168.0.17  netmask 255.255.240.0  broadcast 192.168.15.255
--
ib0: flags=6211<UP,BROADCAST,RUNNING,SLAVE,MULTICAST>  mtu 1500
        infiniband 80:00:02:08:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)
--
ib1: flags=6211<UP,BROADCAST,RUNNING,SLAVE,MULTICAST>  mtu 1500
        infiniband 80:00:02:09:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)


[root@slcm05celadm01 cell]# imageinfo | grep -i '^Active image version'
Active image version: 22.1.90.0.0.220331
注意配置文件的路径一定要找对,一定是当前active的image version
[root@slcm05celadm01 config]# pwd
/opt/oracle/cell/cellofl-22.1.90.0.0_LINUX.X64_220331/cellsrv/deploy/config
[root@slcm05celadm01 config]# cat cellinit.ora
#CELL Initialization Parameters
_cell_ramcache_mode=On
ipaddress1=192.168.0.17/20


cell节点上的配置文件,cellinit.ora里面记录的是cell node的ib卡的ip地址

[root@scaqad02celadm01 config]# pwd
/opt/oracle/cell20.2.0.0.0_LINUX.X64_200810/cellsrv/deploy/config
[root@scaqad02celadm01 config]# cat cellinit.ora
#CELL Initialization Parameters
_cell_ramcache_mode=On
ipaddress2=192.168.0.18/20
ipaddress1=192.168.0.17/20
_cell_fc_persistence_state=WriteBack

#_cell_enable_buffer_hist = true
#_cell_server_event="trace[CELLSRV_Disk_layer.*] disk=highest, memory=highest"
#_cell_server_event="trace[CELL_Block_Server.*] memory=highest"
#_cell_server_event="trace[CELLSRV_IO_Layer.*] memory=highest"
#_cell_server_event="trace[cellsrv_disk_layer.*] memory=highest"

#_cell_server_event="trace[CELL_Block_Server.*] memory=medium"
#_cell_server_event="trace[CELLSRV_IO_Layer.*] memory=highest"
#_cell_server_event="trace[cellsrv_disk_layer.*] memory=highest"
#_cell_server_event="trace[cellsrv_flash_cache_layer.*] memory=highest"
[root@scaqad02celadm01 config]# ifconfig -a | grep ib -A 1
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
ib0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 2044
        inet 192.168.0.17  netmask 255.255.240.0  broadcast 192.168.15.255
        infiniband 80:00:02:08:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)
        RX packets 920832  bytes 125597136 (119.7 MiB)
--
ib1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 2044
        inet 192.168.0.18  netmask 255.255.240.0  broadcast 192.168.15.255
        infiniband 80:00:02:09:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00  txqueuelen 256  (InfiniBand)
        RX packets 500222  bytes 31498120 (30.0 MiB)

然后使用kfod.bin来查看是否能找到磁盘,kfod.bin可以在没有安装gi的情况下运行


[root@slcz01db03 bin]# pwd
/u01/u01/app/12.1.0/grid/bin
[root@slcz01db03 bin]# set | grep ORACLE_HOME
ORACLE_HOME=/u01/u01/app/12.1.0/grid
[root@slcz01db03 bin]# export LD_LIBRARY_PATH=/u01/u01/app/12.1.0/grid/lib


[crsusr@slcz01db03 bin]$ ./kfod.bin
Error 1 initializing CRS infrastructure
--------------------------------------------------------------------------------
 Disk          Size Path                                     User     Group
================================================================================
   1:     409600 MB o/192.168.64.182/dwgz_CD_00_slcc04cel07
   2:     409600 MB o/192.168.64.182/dwgz_CD_01_slcc04cel07
   3:     409600 MB o/192.168.64.182/dwgz_CD_02_slcc04cel07
   4:     409600 MB o/192.168.64.182/dwgz_CD_03_slcc04cel07
   5:     409600 MB o/192.168.64.182/dwgz_CD_04_slcc04cel07
   6:     409600 MB o/192.168.64.182/dwgz_CD_05_slcc04cel07
   7:     409600 MB o/192.168.64.182/dwgz_CD_06_slcc04cel07
   8:     409600 MB o/192.168.64.182/dwgz_CD_07_slcc04cel07
   9:     409600 MB o/192.168.64.182/dwgz_CD_08_slcc04cel07
  10:     409600 MB o/192.168.64.182/dwgz_CD_09_slcc04cel07
  11:     409600 MB o/192.168.64.182/dwgz_CD_10_slcc04cel07
  12:     409600 MB o/192.168.64.182/dwgz_CD_11_slcc04cel07
  13:     409600 MB o/192.168.64.183/dwgz_CD_00_slcc04cel08
  14:     409600 MB o/192.168.64.183/dwgz_CD_01_slcc04cel08
  15:     409600 MB o/192.168.64.183/dwgz_CD_02_slcc04cel08
  16:     409600 MB o/192.168.64.183/dwgz_CD_03_slcc04cel08
  17:     409600 MB o/192.168.64.183/dwgz_CD_04_slcc04cel08
  18:     409600 MB o/192.168.64.183/dwgz_CD_05_slcc04cel08
  19:     409600 MB o/192.168.64.183/dwgz_CD_06_slcc04cel08
  20:     409600 MB o/192.168.64.183/dwgz_CD_07_slcc04cel08
  21:     409600 MB o/192.168.64.183/dwgz_CD_08_slcc04cel08
  22:     409600 MB o/192.168.64.183/dwgz_CD_09_slcc04cel08
  23:     409600 MB o/192.168.64.183/dwgz_CD_10_slcc04cel08
  24:     409600 MB o/192.168.64.183/dwgz_CD_11_slcc04cel08
KFOD-00301: Unable to contact Cluster Synchronization Services (CSS). Return code 2 from kgxgncin.
上面这行不是错误,是因为没有安装gi
[root@scaqai06adm08 tyl]# rpm -ivh exadata-dbmmgmt-20.2.0.0.0.200505-1.noarch.rpm
Preparing...                          ################################# [100%]
2020-07-19 04:17:25 -0700: Pre Installation steps in progress ...
2020-07-19 04:17:27 -0700: This is a fresh install.
Updating / installing...
   1:exadata-dbmmgmt-20.2.0.0.0.200505################################# [100%]
2020-07-19 04:17:35 -0700: Post Installation steps in progress ...
Starting MS...
Importing snmp suscriber from compmon service...
Successfully imported snmp subscribers.
Installation SUCCESSFUL.
Done. Please Login as user dbmadmin.

在清环境的时候由于不知道exadata-dbmmgmt-20.2.0.0.0.200505-1.noarch.rpm这个rpm是干什么用的,原来以为只是exascale用,就将之删除了 rpm -e
导致出现下面的错误

./kfod.bin
Error 1 initializing CRS infrastructure
KFOD-00302: Error encountered in device access layer: OSS Operation oss_initialize failed with error 149 [Unable to load lib ISAL]

最近刚碰到的另外一个导致compute node无法发现griddisk的原因,应该是exascale残留的配置文件导致的,删除exascale配置文件就解决了


[root@scaqai06adm08 bin]# ./kfod.bin disks=all
--------------------------------------------------------------------------------
ORACLE_SID ORACLE_HOME
================================================================================


需要删除/etc/oracle/cell/network-config/下的和exascacle相关的配置文件

[root@scaqai06adm08 network-config]# ls
cellinit.ora  cellip.ora  cellroute.ora  egsip.ora  egsip.ora-orig  eswallet
[root@scaqai06adm08 network-config]# rm -rf egs*
[root@scaqai06adm08 network-config]# rm -rf eswallet/
[root@scaqai06adm08 network-config]# ls
cellinit.ora  cellip.ora  cellroute.ora

不知道为什么cellroute.ora 文件总是被清空?

[root@scaqai06adm08 network-config]# cat cellroute.ora
# Routes for 192.168.41.245;192.168.41.246
route="192.168.41.245;192.168.41.221"
route="192.168.41.246;192.168.41.222"


# Routes for 192.168.41.247;192.168.41.248
route="192.168.41.247;192.168.41.221"
route="192.168.41.248;192.168.41.222"


# Routes for 192.168.41.249;192.168.41.250
route="192.168.41.249;192.168.41.221"
route="192.168.41.250;192.168.41.222"
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值