索引恢复
allocation阶段最后会向集群发布一个新的集群状态,状态发布后会进入IndicesClusterStateService的applyClusterState应用集群状态
@Override
public synchronized void applyClusterState(final ClusterChangedEvent event) {
if (!lifecycle.started()) {
return;
}
final ClusterState state = event.state();
//我们需要清理这个节点上的分片和索引,因为我们
// 一旦状态持久性被禁用(无主/未恢复),将再次恢复它们
if (state.blocks().disableStatePersistence()) {
for (AllocatedIndex<? extends Shard> indexService : indicesService) {
// also cleans shards
indicesService.removeIndex(indexService.index(), NO_LONGER_ASSIGNED, "cleaning index (disabled block persistence)");
}
return;
}
//从失败的分片缓存中删除不再由主节点分配给该节点的分片条目
updateFailedShardsCache(state);
//还删除已删除索引的分片
deleteIndices(event);
//删除索引分片不再分配给当前节点的索引分片
removeIndices(event);
//报告主节点分给当前节点的索引分片,但是不在当前节点
failMissingShards(state);
//删除任何与 master 期望不匹配的本地分片
removeShards(state);
//也可以使分片失败,但这些都保证在 failedShardsCache
updateIndices(event);
//创建索引
createIndices(state);
//创建或更新分片
createOrUpdateShards(state);
}
删除不再由主节点分配给本节点分片条目
- master发生变更路由表为空则删除所有数据
- 移除不在本节点分配的信息
- 将分配失败的shard发送给master由master进行处理
//从失败的分片缓存中删除不再由主节点分配给该节点的分片条目
updateFailedShardsCache(state);
private void updateFailedShardsCache(final ClusterState state) {
RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
//master发生变更,路由表是空的,则删除所有数据
if (localRoutingNode == null) {
failedShardsCache.clear();
return;
}
DiscoveryNode masterNode = state.nodes().getMasterNode();
// remove items from cache which are not in our routing table anymore and resend failures that have not executed on master yet
//移除不在本节点分配的信息
for (Iterator<Map.Entry<ShardId, ShardRouting>> iterator = failedShardsCache.entrySet().iterator(); iterator.hasNext(); ) {
ShardRouting failedShardRouting = iterator.next().getValue();
ShardRouting matchedRouting = localRoutingNode.getByShardId(failedShardRouting.shardId());
if (matchedRouting == null || matchedRouting.isSameAllocation(failedShardRouting) == false) {
iterator.remove();
} else {
if (masterNode != null) {
// TODO: can we remove this? Is resending shard failures the responsibility of shardStateAction?
String message = "master " + masterNode + " has not removed previously failed shard. resending shard failure";
logger.trace("[{}] re-sending failed shard [{}], reason [{}]", matchedRouting.shardId(), matchedRouting, message);
//将失败的shard发送给master,由master进行处理
shardStateAction.localShardFailed(matchedRouting, message, null, SHARD_STATE_ACTION_LISTENER, state);
}
}
}
}
执行删除索引处理
//删除已删除索引
deleteIndices(event);
private void deleteIndices(final ClusterChangedEvent event) {
final ClusterState previousState = event.previousState();
final ClusterState state = event.state();
final String localNodeId = state.nodes().getLocalNodeId();
assert localNodeId != null;
//遍历待删除索引
for (Index index : event.indicesDeleted()) {
if (logger.isDebugEnabled()) {
logger.debug("[{}] cleaning index, no longer part of the metadata", index);
}
//每个索引单独的服务
AllocatedIndex<? extends Shard> indexService = indicesService.indexService(index);
final IndexSettings indexSettings;
if (indexService != null) {
indexSettings = indexService.getIndexSettings();
//删除索引
indicesService.removeIndex(index, DELETED, "index no longer part of the metadata");
} else if (previousState.metadata().hasIndex(index.getName())) {
// The deleted index was part of the previous cluster state, but not loaded on the local node
//previousState包含待删除的索引需要删除
final IndexMetadata metadata = previousState.metadata().index(index);
indexSettings = new IndexSettings(metadata, settings);
indicesService.deleteUnassignedIndex("deleted index was not assigned to local node", metadata, state);
} else {
// The previous cluster state's metadata also does not contain the index,
// which is what happens on node startup when an index was deleted while the
// node was not part of the cluster. In this case, try reading the index
// metadata from disk. If its not there, there is nothing to delete.
// First, though, verify the precondition for applying this case by
// asserting that the previous cluster state is not initialized/recovered.
assert previousState.blocks().hasGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK);
//验证索引是否删除
final IndexMetadata metadata = indicesService.verifyIndexIsDeleted(index, event.state());
if (metadata != null) {
indexSettings = new IndexSettings(metadata, settings);
} else {
indexSettings = null;
}
}
if (indexSettings != null) {
threadPool.generic().execute(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}] failed to complete pending deletion for index", index), e);
}
@Override
protected void doRun() throws Exception {
try {
// we are waiting until we can lock the index / all shards on the node and then we ack the delete of the store
// to the master. If we can't acquire the locks here immediately there might be a shard of this index still
// holding on to the lock due to a "currently canceled recovery" or so. The shard will delete itself BEFORE the
// lock is released so it's guaranteed to be deleted by the time we get the lock
//执行索引删除
indicesService.processPendingDeletes(index, indexSettings, new TimeValue(30, TimeUnit.MINUTES));
} catch (ShardLockObtainFailedException exc) {
logger.warn("[{}] failed to lock all shards for index - timed out after 30 seconds", index);
} catch (InterruptedException e) {
logger.warn("[{}] failed to lock all shards for index - interrupted", index);
}
}
});
}
}
}
针对close操作进行remove操作释放资源,并不会删除数据
//主要处理索引关闭释放资源
removeIndices(event);
通知主节点应该在此节点上处于活动状态的分片但是不存在的分片
failMissingShards(state);
删除没有没有分配到当前节点的shard和过时的shard
removeShards(state);
private void removeShards(final ClusterState state) {
final String localNodeId = state.nodes().getLocalNodeId();
assert localNodeId != null;
// remove shards based on routing nodes (no deletion of data)
//节点和分片映射关系
RoutingNode localRoutingNode = state.getRoutingNodes().node(localNodeId);
for (AllocatedIndex<? extends Shard> indexService : indicesService) {
for (Shard shard : indexService) {
ShardRouting currentRoutingEntry = shard.routingEntry();
ShardId shardId = currentRoutingEntry.shardId();
ShardRouting newShardRouting = localRoutingNode == null ? null : localRoutingNode.getByShardId(shardId);
if (newShardRouting == null) {
// we can just remove the shard without cleaning it locally, since we will clean it in IndicesStore
// once all shards are allocated
//删除没有分配给当前节点的shard
logger.debug("{} removing shard (not allocated)", shardId);
indexService.removeShard(shardId.id(), "removing shard (not allocated)");
} else if (newShardRouting.isSameAllocation(currentRoutingEntry) == false) {
logger.debug("{} removing shard (stale allocation id, stale {}, new {})", shardId,
currentRoutingEntry, newShardRouting);
//删除过时的shard
indexService.removeShard(shardId.id(), "removing shard (stale copy)");
} else if (newShardRouting.initializing() && currentRoutingEntry.active()) {
// this can happen if the node was isolated/gc-ed, rejoins the cluster and a new shard with the same allocation id
// is assigned to it. Batch cluster state processing or if shard fetching completes before the node gets a new cluster
// state may result in a new shard being initialized while having the same allocation id as the currently started shard.
logger.debug("{} removing shard (not active, current {}, new {})", shardId, currentRoutingEntry, newShardRouting);
//当节点离线后重新加入集群,可能导致分片的allocation id相同,则需要移除旧的shard
indexService.removeShard(shardId.id(), "removing shard (stale copy)");
} else if (newShardRouting.primary() && currentRoutingEntry.primary() == false && newShardRouting.initializing()) {
assert currentRoutingEntry.initializing() : currentRoutingEntry; // see above if clause
// this can happen when cluster state batching batches activation of the shard, closing an index, reopening it
// and assigning an initializing primary to this node
//批量更新时,关闭索引,重新打开
logger.debug("{} removing shard (not active, current {}, new {})", shardId, currentRoutingEntry, newShardRouting);
indexService.removeShard(shardId.id(), "removing shard (stale copy)");
}
}
}
}
更新索引mappings和settings
//更新索引 settings、mapping 等信息
updateIndices(event);
创建索引服务
//创建索引
createIndices(state
private void createIndices(final ClusterState state) {
// we only create indices for shards that are allocated
//我们只为分配的分片创建索引
//为分配给我们的shard创建索引
RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
if (localRoutingNode == null) {
return;
}
// create map of indices to create with shards to fail if index creation fails
//如果索引创建失败,则创建索引映射以使用分片创建失败
final Map<Index, List<ShardRouting>> indicesToCreate = new HashMap<>();
for (ShardRouting shardRouting : localRoutingNode) {
if (failedShardsCache.containsKey(shardRouting.shardId()) == false) {
final Index index = shardRouting.index();
if (indicesService.indexService(index) == null) {
indicesToCreate.computeIfAbsent(index, k -> new ArrayList<>()).add(shardRouting);
}
}
}
for (Map.Entry<Index, List<ShardRouting>> entry : indicesToCreate.entrySet()) {
final Index index = entry.getKey();
final IndexMetadata indexMetadata = state.metadata().index(index);
logger.debug("[{}] creating index", index);
AllocatedIndex<? extends Shard> indexService = null;
try {
//创建索引
indexService = indicesService.createIndex(indexMetadata, buildInIndexListener, true);
if (indexService.updateMapping(null, indexMetadata) && sendRefreshMapping) {
nodeMappingRefreshAction.nodeMappingRefresh(state.nodes().getMasterNode(),
new NodeMappingRefreshAction.NodeMappingRefreshRequest(indexMetadata.getIndex().getName(),
indexMetadata.getIndexUUID(), state.nodes().getLocalNodeId())
);
}
} catch (Exception e) {
final String failShardReason;
if (indexService == null) {
failShardReason = "failed to create index";
} else {
failShardReason = "failed to update mapping for index";
indicesService.removeIndex(index, FAILURE, "removing index (mapping update failed)");
}
for (ShardRouting shardRouting : entry.getValue()) {
sendFailShard(shardRouting, failShardReason, e, state);
}
}
}
}
创建索引方法起始就是创建indexService服务放入缓存,并没有执行真正的创建索引操作
@Override
public synchronized IndexService createIndex(
final IndexMetadata indexMetadata, final List<IndexEventListener> builtInListeners,
final boolean writeDanglingIndices) throws IOException {
ensureChangesAllowed();
//检查请求是否有效
if (indexMetadata.getIndexUUID().equals(IndexMetadata.INDEX_UUID_NA_VALUE)) {
throw new IllegalArgumentException("index must have a real UUID found value: [" + indexMetadata.getIndexUUID() + "]");
}
final Index index = indexMetadata.getIndex();
//检查索引是否已经存在
if (hasIndex(index)) {
throw new ResourceAlreadyExistsException(index);
}
List<IndexEventListener> finalListeners = new ArrayList<>(builtInListeners);
final IndexEventListener onStoreClose = new IndexEventListener() {
@Override
public void onStoreCreated(ShardId shardId) {
indicesRefCount.incRef();
}
@Override
public void onStoreClosed(ShardId shardId) {
try {
indicesQueryCache.onClose(shardId);
} finally {
indicesRefCount.decRef();
}
}
};
finalListeners.add(onStoreClose);
finalListeners.add(oldShardsStats);
//创建索引服务
final IndexService indexService =
createIndexService(
CREATE_INDEX,
indexMetadata,
indicesQueryCache,
indicesFieldDataCache,
finalListeners,
indexingMemoryController);
boolean success = false;
try {
if (writeDanglingIndices && nodeWriteDanglingIndicesInfo) {
indexService.addMetadataListener(imd -> updateDanglingIndicesInfo(index));
}
indexService.getIndexEventListener().afterIndexCreated(indexService);
//存储索引服务
indices = newMapBuilder(indices).put(index.getUUID(), indexService).immutableMap();
if (writeDanglingIndices) {
if (nodeWriteDanglingIndicesInfo) {
updateDanglingIndicesInfo(index);
} else {
indexService.deleteDanglingIndicesInfo();
}
}
success = true;
return indexService;
} finally {
if (success == false) {
indexService.close("plugins_failed", true);
}
}
}
执行真正的索引创建或更新索引
//创建或更新分片
createOrUpdateShards(state);
private void createOrUpdateShards(final ClusterState state) {
RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
if (localRoutingNode == null) {
return;
}
DiscoveryNodes nodes = state.nodes();
//索引分片访问路由表
RoutingTable routingTable = state.routingTable();
for (final ShardRouting shardRouting : localRoutingNode) {
ShardId shardId = shardRouting.shardId();
if (failedShardsCache.containsKey(shardId) == false) {
AllocatedIndex<? extends Shard> indexService = indicesService.indexService(shardId.getIndex());
assert indexService != null : "index " + shardId.getIndex() + " should have been created by createIndices";
Shard shard = indexService.getShardOrNull(shardId.id());
if (shard == null) {
assert shardRouting.initializing() : shardRouting + " should have been removed by failMissingShards";
//创建分片
createShard(nodes, routingTable, shardRouting, state);
} else {
//更新分片
updateShard(nodes, shardRouting, shard, routingTable, state);
}
}
}
}
如果分配的shard在本节点之前没有则需要创建
private void createShard(DiscoveryNodes nodes, RoutingTable routingTable, ShardRouting shardRouting, ClusterState state) {
assert shardRouting.initializing() : "only allow shard creation for initializing shard but was " + shardRouting;
DiscoveryNode sourceNode = null;
//需要从远程恢复
if (shardRouting.recoverySource().getType() == Type.PEER) {
//查到主分片所在的节点
sourceNode = findSourceNodeForPeerRecovery(logger, routingTable, nodes, shardRouting);
if (sourceNode == null) {
logger.trace("ignoring initializing shard {} - no source node can be found.", shardRouting.shardId());
return;
}
}
try {
final long primaryTerm = state.metadata().index(shardRouting.index()).primaryTerm(shardRouting.id());
logger.debug("{} creating shard with primary term [{}]", shardRouting.shardId(), primaryTerm);
//跟踪与分片恢复相关的状态。
RecoveryState recoveryState = new RecoveryState(s