1.关闭主库。
shard1:PRIMARY> use admin
switched to db admin
shard1:PRIMARY> db.shutdownServer()
server should be down...
2024-06-15T17:45:12.844+0800 I NETWORK [js] trying reconnect to 127.0.0.1:27025 failed
2024-06-15T17:45:13.389+0800 I NETWORK [js] reconnect 127.0.0.1:27025 failed failed
2024-06-15T17:45:13.391+0800 I NETWORK [js] trying reconnect to 127.0.0.1:27025 failed
2024-06-15T17:45:13.391+0800 I NETWORK [js] reconnect 127.0.0.1:27025 failed failed
>
2.检查集群状态
shard1:PRIMARY> rs.status()
{
"set" : "shard1",
"date" : ISODate("2024-06-15T09:45:44.129Z"),
"myState" : 1,
"term" : NumberLong(3),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1718444703, 1),
"t" : NumberLong(2)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1718444703, 1),
"t" : NumberLong(2)
},
"appliedOpTime" : {
"ts" : Timestamp(1718444734, 1),
"t" : NumberLong(3)
},
"durableOpTime" : {
"ts" : Timestamp(1718444734, 1),
"t" : NumberLong(3)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1718444693, 1),
"members" : [
{
"_id" : 0,
"name" : "192.168.1.51:27023",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 43025,
"optime" : {
"ts" : Timestamp(1718444734, 1),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2024-06-15T09:45:34Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1718444712, 1),
"electionDate" : ISODate("2024-06-15T09:45:12Z"),
"configVersion" : 7,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "192.168.1.51:27024",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 42606,
"lastHeartbeat" : ISODate("2024-06-15T09:45:42.856Z"),
"lastHeartbeatRecv" : ISODate("2024-06-15T09:45:42.410Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 7
},
{
"_id" : 2,
"name" : "192.168.1.51:27025",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2024-06-15T09:45:42.871Z"),
"lastHeartbeatRecv" : ISODate("2024-06-15T09:45:12.371Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Error connecting to 192.168.1.51:27025 :: caused by :: Connection refused",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
}
],
"ok" : 1,
"operationTime" : Timestamp(1718444734, 1),
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000003")
},
"lastCommittedOpTime" : Timestamp(1718444703, 1),
"$configServerState" : {
"opTime" : {
"ts" : Timestamp(1718444739, 1),
"t" : NumberLong(1)
}
},
"$clusterTime" : {
"clusterTime" : Timestamp(1718444739, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
现在变成了,一个主库和一个仲裁节点。
且原来的主库:27025无法连接。所以,可以看到确实已经完成了,自动故障切换。
3.新增数据并重启宕机的节点
shard1:PRIMARY> db.test1.insert({"id":1,"name":"薛双奇"})
WriteResult({ "nInserted" : 1 })
shard1:PRIMARY> db.test1.insert({"id":2,"name":"薛双奇2"})
WriteResult({ "nInserted" : 1 })
shard1:PRIMARY> db.test1.insert({"id":3,"name":"薛双奇3"})
WriteResult({ "nInserted" : 1 })
--新主节点插入的数据。
shard1:PRIMARY> db.test1.find()
{ "_id" : ObjectId("666d63abfc41b781c917bc01"), "id" : 1, "name" : "薛双奇" }
{ "_id" : ObjectId("666d63b2fc41b781c917bc02"), "id" : 2, "name" : "薛双奇2" }
{ "_id" : ObjectId("666d63b8fc41b781c917bc03"), "id" : 3, "name" : "薛双奇3" }
shard1:PRIMARY>
--检查集群状态。
shard1:PRIMARY> rs.status()
{
"set" : "shard1",
"date" : ISODate("2024-06-15T09:51:51.449Z"),
"myState" : 1,
"term" : NumberLong(4),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1718445106, 1),
"t" : NumberLong(4)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1718445106, 1),
"t" : NumberLong(4)
},
"appliedOpTime" : {
"ts" : Timestamp(1718445106, 1),
"t" : NumberLong(4)
},
"durableOpTime" : {
"ts" : Timestamp(1718445106, 1),
"t" : NumberLong(4)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1718444703, 1),
"members" : [
{
"_id" : 0,
"name" : "192.168.1.51:27023",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 57,
"optime" : {
"ts" : Timestamp(1718445106, 1),
"t" : NumberLong(4)
},
"optimeDurable" : {
"ts" : Timestamp(1718445106, 1),
"t" : NumberLong(4)
},
"optimeDate" : ISODate("2024-06-15T09:51:46Z"),
"optimeDurableDate" : ISODate("2024-06-15T09:51:46Z"),
"lastHeartbeat" : ISODate("2024-06-15T09:51:51.021Z"),
"lastHeartbeatRecv" : ISODate("2024-06-15T09:51:51.157Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "192.168.1.51:27025",
"syncSourceHost" : "192.168.1.51:27025",
"syncSourceId" : 2,
"infoMessage" : "",
"configVersion" : 7
},
{
"_id" : 1,
"name" : "192.168.1.51:27024",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 57,
"lastHeartbeat" : ISODate("2024-06-15T09:51:51.021Z"),
"lastHeartbeatRecv" : ISODate("2024-06-15T09:51:50.625Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 7
},
{
"_id" : 2,
"name" : "192.168.1.51:27025",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 60,
"optime" : {
"ts" : Timestamp(1718445106, 1),
"t" : NumberLong(4)
},
"optimeDate" : ISODate("2024-06-15T09:51:46Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1718445064, 2),
"electionDate" : ISODate("2024-06-15T09:51:04Z"),
"configVersion" : 7,
"self" : true,
"lastHeartbeatMessage" : ""
}
],
"ok" : 1,
"operationTime" : Timestamp(1718445106, 1),
"$gleStats" : {
"lastOpTime" : Timestamp(0, 0),
"electionId" : ObjectId("7fffffff0000000000000004")
},
"lastCommittedOpTime" : Timestamp(1718445106, 1),
"$configServerState" : {
"opTime" : {
"ts" : Timestamp(1718445106, 1),
"t" : NumberLong(1)
}
},
"$clusterTime" : {
"clusterTime" : Timestamp(1718445106, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
---我们发现原来的27025又变回了主节点。
shard1:PRIMARY> use test;
switched to db test
shard1:PRIMARY> show tables;
test1
testindx
users
shard1:PRIMARY> db.test1.find()
{ "_id" : ObjectId("666d63b8fc41b781c917bc03"), "id" : 3, "name" : "薛双奇3" }
{ "_id" : ObjectId("666d63b2fc41b781c917bc02"), "id" : 2, "name" : "薛双奇2" }
{ "_id" : ObjectId("666d63abfc41b781c917bc01"), "id" : 1, "name" : "薛双奇" }
shard1:PRIMARY>
4.总结
--27025 宕机后,中27023是主节点,我们在27023上插入了test1表。
--重启 27025 后,它不仅同步了丢失的部分数据。而且又自动切换为主节点了。
--这里的主要原因是,初始化集群时的优先级。
--我们回顾下初始化的章节里。27023优先级为1,所以自动变成了主。
--说明优先级越低,就会发生抢占情况,自动优先变长主节点。