文章目录
前言
在源码篇–Nacos服务–中章(5) 对nacos 客户端的创建及grpc 连接的建立,心跳检查,实例注册进行了介绍;本文对grpc通道建立&客户端服务信息获取(故障转移)细节进行介绍。
一、grpc通道建立:
1.1 客户端创建:
public static RpcClient createClient(String clientName, ConnectionType connectionType, Integer threadPoolCoreSize,
Integer threadPoolMaxSize, Map<String, String> labels, RpcClientTlsConfig tlsConfig) {
// 不是grpc 抛出异常
if (!ConnectionType.GRPC.equals(connectionType)) {
throw new UnsupportedOperationException("unsupported connection type :" + connectionType.getType());
}
// 客户端创建 Map<String, RpcClient> CLIENT_MAP
return CLIENT_MAP.computeIfAbsent(clientName, clientNameInner -> {
LOGGER.info("[RpcClientFactory] create a new rpc client of " + clientName);
return new GrpcSdkClient(clientNameInner, threadPoolCoreSize, threadPoolMaxSize, labels, tlsConfig);
});
}
类图:
1.2 随机选择一个服务端准备建立连接:
while (startUpRetryTimes >= 0 && connectToServer == null) {
try {
startUpRetryTimes--;
// 随机获取一个服务端的地址
ServerInfo serverInfo = nextRpcServer();
LoggerUtils.printIfInfoEnabled(LOGGER, "[{}] Try to connect to server on start up, server: {}",
rpcClientConfig.name(), serverInfo);
// 和nacos 服务端建立连接
connectToServer = connectToServer(serverInfo);
} catch (Throwable e) {
LoggerUtils.printIfWarnEnabled(LOGGER,
"[{}] Fail to connect to server on start up, error message = {}, start up retry times left: {}",
rpcClientConfig.name(), e.getMessage(), startUpRetryTimes, e);
}
}
protected ServerInfo nextRpcServer() {
// 从服务列表随机获取服务端
String serverAddress = getServerListFactory().genNextServer();
// 包装ServerInfo 对象
return resolveServerInfo(serverAddress);
}
// ServerListManager
@Override
public String genNextServer() {
int index = currentIndex.incrementAndGet() % getServerList().size();
return getServerList().get(index);
}
1.3 连接建立:
流程图:
1.3.1 客户端连接建立:
@Override
public Connection connectToServer(ServerInfo serverInfo) {
// the newest connection id
String connectionId = "";
try {
if (grpcExecutor == null) {
this.grpcExecutor = createGrpcExecutor(serverInfo.getServerIp());
}
int port = serverInfo.getServerPort() + rpcPortOffset();
// 通道创建
ManagedChannel managedChannel = createNewManagedChannel(serverInfo.getServerIp(), port);
// 存根创建
RequestGrpc.RequestFutureStub newChannelStubTemp = createNewChannelStub(managedChannel);
// grpc 连接建立
Response response = serverCheck(serverInfo.getServerIp(), port, newChannelStubTemp);
if (!(response instanceof ServerCheckResponse)) {
shuntDownChannel(managedChannel);
return null;
}
// submit ability table as soon as possible
// ability table will be null if server doesn't support ability table
// 连接成功 设置连接id
ServerCheckResponse serverCheckResponse = (ServerCheckResponse) response;
connectionId = serverCheckResponse.getConnectionId();
// 创建服务 双向流式
BiRequestStreamGrpc.BiRequestStreamStub biRequestStreamStub = BiRequestStreamGrpc
.newStub(newChannelStubTemp.getChannel());
// GrpcConnection 连接对象创建 serverInfo 服务端信息 grpcExecutor 线程池
GrpcConnection grpcConn = new GrpcConnection(serverInfo, grpcExecutor);
grpcConn.setConnectionId(connectionId);
// if not supported, it will be false
if (serverCheckResponse.isSupportAbilityNegotiation()) {
// mark
this.recAbilityContext.reset(grpcConn);
// promise null if no abilities receive
grpcConn.setAbilityTable(null);
}
//create stream request and bind connection event to this connection.
// 创建服务调用存根
StreamObserver<Payload> payloadStreamObserver = bindRequestStream(biRequestStreamStub, grpcConn);
// stream observer to send response to server
grpcConn.setPayloadStreamObserver(payloadStreamObserver);
grpcConn.setGrpcFutureServiceStub(newChannelStubTemp);
grpcConn.setChannel(managedChannel);
//send a setup request.
ConnectionSetupRequest conSetupRequest = new ConnectionSetupRequest();
conSetupRequest.setClientVersion(VersionUtils.getFullClientVersion());
conSetupRequest.setLabels(super.getLabels());
// set ability table
conSetupRequest
.setAbilityTable(NacosAbilityManagerHolder.getInstance().getCurrentNodeAbilities(abilityMode()));
conSetupRequest.setTenant(super.getTenant());
// 发送连接建立成功的请求
grpcConn.sendRequest(conSetupRequest);
// wait for response
if (recAbilityContext.isNeedToSync()) {
// try to wait for notify response
recAbilityContext.await(this.clientConfig.capabilityNegotiationTimeout(), TimeUnit.MILLISECONDS);
// if no server abilities receiving, then reconnect
if (!recAbilityContext.check(grpcConn)) {
return null;
}
} else {
// leave for adapting old version server
// registration is considered successful by default after 100ms
// wait to register connection setup
Thread.sleep(100L);
}
return grpcConn;
} catch (Exception e) {
LOGGER.error("[{}]Fail to connect to server!,error={}", GrpcClient.this.getName(), e);
// remove and notify
recAbilityContext.release(null);
}
return null;
}
通道建立:
/**
* create a new channel with specific server address.
*
* @param serverIp serverIp.
* @param serverPort serverPort.
* @return if server check success,return a non-null channel.
*/
private ManagedChannel createNewManagedChannel(String serverIp, int serverPort) {
LOGGER.info("grpc client connection server:{} ip,serverPort:{},grpcTslConfig:{}", serverIp, serverPort,
JacksonUtils.toJson(clientConfig.tlsConfig()));
// 绑定服务端的ip 和端口
ManagedChannelBuilder<?> managedChannelBuilder = buildChannel(serverIp, serverPort, buildSslContext())
.executor(grpcExecutor).compressorRegistry(CompressorRegistry.getDefaultInstance())
.decompressorRegistry(DecompressorRegistry.getDefaultInstance())
.maxInboundMessageSize(clientConfig.maxInboundMessageSize())
.keepAliveTime(clientConfig.channelKeepAlive(), TimeUnit.MILLISECONDS)
.keepAliveTimeout(clientConfig.channelKeepAliveTimeout(), TimeUnit.MILLISECONDS);
// 通过 build 方法建立通道
return managedChannelBuilder.build();
}
1.3.2 nacos 服务端连接建立:
createNewManagedChannel 方法,客户端通过grpc 与服务端建立了连接;
1.3.2.1 通道建立完成时:
AddressTransportFilter 传输过滤器,一个客户端与服务端建立连接完成进入transportReady 方法,创建当前客户端的连接id 及保存客户端的信息,封装为Attributes 属性对象;
/**
* 通道建立完毕
* 客户端与服务端通信
* 先建立通道channel 然后就可以发送请求
* @param transportAttrs
* @return
*/
@Override
public Attributes transportReady(Attributes transportAttrs) {
// 远程客户端 地址
InetSocketAddress remoteAddress = (InetSocketAddress) transportAttrs
.get(Grpc.TRANSPORT_ATTR_REMOTE_ADDR);
// 本机服务端地址
InetSocketAddress localAddress = (InetSocketAddress) transportAttrs
.get(Grpc.TRANSPORT_ATTR_LOCAL_ADDR);
// 客户端 端口获取
int remotePort = remoteAddress.getPort();
// 服务端端口
int localPort = localAddress.getPort();
// 远程地址ip
String remoteIp = remoteAddress.getAddress().getHostAddress();
// 属性设置
Attributes attrWrapper = transportAttrs.toBuilder()
// 客户端的 连接id 时间毫秒+ 客户端ip+客户端端口
.set(ATTR_TRANS_KEY_CONN_ID, System.currentTimeMillis() + "_" + remoteIp + "_" + remotePort)
// 客户端的ip
.set(ATTR_TRANS_KEY_REMOTE_IP, remoteIp)
// 客户端的端口
.set(ATTR_TRANS_KEY_REMOTE_PORT, remotePort)
// 本机服务端的端口
.set(ATTR_TRANS_KEY_LOCAL_PORT, localPort).build();
// 连接id
String connectionId = attrWrapper.get(ATTR_TRANS_KEY_CONN_ID);
Loggers.REMOTE_DIGEST.info("Connection transportReady,connectionId = {} ", connectionId);
return attrWrapper;
}
1.3.2.2 请求拦截器 GrpcConnectionInterceptor:
每次请求调用时:请求拦截器 GrpcConnectionInterceptor 在进入具体的方法之前,填充上下文,其中就包括建立的连接id(connectId)
/**
* 每次请求都会进入 拦截器 构建context 上下文 然后在进入到请求的方法中
* @param call 对应客户端的信息
* @param headers
* @param next
* @return
* @param <T>
* @param <S>
*/
@Override
public <T, S> ServerCall.Listener<T> interceptCall(ServerCall<T, S> call, Metadata headers,
ServerCallHandler<T, S> next) {
// 将通道的属性值,放入到context 上下文
Context ctx = Context.current().withValue(GrpcServerConstants.CONTEXT_KEY_CONN_ID,
call.getAttributes().get(GrpcServerConstants.ATTR_TRANS_KEY_CONN_ID))
.withValue(GrpcServerConstants.CONTEXT_KEY_CONN_REMOTE_IP,
call.getAttributes().get(GrpcServerConstants.ATTR_TRANS_KEY_REMOTE_IP))
.withValue(GrpcServerConstants.CONTEXT_KEY_CONN_REMOTE_PORT,
call.getAttributes().get(GrpcServerConstants.ATTR_TRANS_KEY_REMOTE_PORT))
.withValue(GrpcServerConstants.CONTEXT_KEY_CONN_LOCAL_PORT,
call.getAttributes().get(GrpcServerConstants.ATTR_TRANS_KEY_LOCAL_PORT));
if (GrpcServerConstants.REQUEST_BI_STREAM_SERVICE_NAME.equals(call.getMethodDescriptor().getServiceName())) {
Channel internalChannel = getInternalChannel(call);
ctx = ctx.withValue(GrpcServerConstants.CONTEXT_KEY_CHANNEL, internalChannel);
}
// 将上下文放入到请求的方法中
return Contexts.interceptCall(ctx, call, headers, next);
}
1.3.2.3 服务端处理serverCheck :
通过serverCheck请求获取到服务端 返回的connectId;
/**
* check server if success.
*
* @param requestBlockingStub requestBlockingStub used to check server.
* @return success or not
*/
private Response serverCheck(String ip, int port, RequestGrpc.RequestFutureStub requestBlockingStub) {
try {
// 创建服务端检查请求
ServerCheckRequest serverCheckRequest = new ServerCheckRequest();
// grpc Payload 创建
Payload grpcRequest = GrpcUtils.convert(serverCheckRequest);
// 发起请求
ListenableFuture<Payload> responseFuture = requestBlockingStub.request(grpcRequest);
// 获取服务端的响应
Payload response = responseFuture.get(clientConfig.serverCheckTimeOut(), TimeUnit.MILLISECONDS);
//receive connection unregister response here,not check response is success.
// 转换为Response 对象
return (Response) GrpcUtils.parse(response);
} catch (Exception e) {
LoggerUtils.printIfErrorEnabled(LOGGER,
"Server check fail, please check server {} ,port {} is available , error ={}", ip, port, e);
if (this.clientConfig != null && this.clientConfig.tlsConfig() != null && this.clientConfig.tlsConfig()
.getEnableTls()) {
LoggerUtils.printIfErrorEnabled(LOGGER,
"current client is require tls encrypted ,server must support tls ,please check");
}
return null;
}
}
1.3.2.4 服务端处理客户端发送的连接建立请求 ConnectionSetupRequest :
1)客户端发送连接成功建立请求:
通过serverCheck请求获取到服务端 返回的connectId;
//send a setup request.
ConnectionSetupRequest conSetupRequest = new ConnectionSetupRequest();
conSetupRequest.setClientVersion(VersionUtils.getFullClientVersion());
conSetupRequest.setLabels(super.getLabels());
conSetupRequest.setAbilities(super.clientAbilities);
conSetupRequest.setTenant(super.getTenant());
// payloadStreamObserver 双向流发送请求
grpcConn.sendRequest(conSetupRequest);
2)服务端接收到请求 GrpcBiStreamRequestAcceptor 双向流处理器:
@Override
public StreamObserver<Payload> requestBiStream(StreamObserver<Payload> responseObserver) {
StreamObserver<Payload> streamObserver = new StreamObserver<Payload>() {
// 连接id
final String connectionId = GrpcServerConstants.CONTEXT_KEY_CONN_ID.get();
// 本机端口
final Integer localPort = GrpcServerConstants.CONTEXT_KEY_CONN_LOCAL_PORT.get();
// 客户端端口
final int remotePort = GrpcServerConstants.CONTEXT_KEY_CONN_REMOTE_PORT.get();
// 客户端ip
String remoteIp = GrpcServerConstants.CONTEXT_KEY_CONN_REMOTE_IP.get();
String clientIp = "";
@Override
public void onNext(Payload payload) {
// 客户端ip
clientIp = payload.getMetadata().getClientIp();
// 请求跟踪
traceDetailIfNecessary(payload);
// 客户端数据获取
Object parseObj;
try {
parseObj = GrpcUtils.parse(payload);
} catch (Throwable throwable) {
Loggers.REMOTE_DIGEST
.warn("[{}]Grpc request bi stream,payload parse error={}", connectionId, throwable);
return;
}
if (parseObj == null) {
Loggers.REMOTE_DIGEST
.warn("[{}]Grpc request bi stream,payload parse null ,body={},meta={}", connectionId,
payload.getBody().getValue().toStringUtf8(), payload.getMetadata());
return;
}
if (parseObj instanceof ConnectionSetupRequest) {
// 获取客户端发送的 连接建立的请求
ConnectionSetupRequest setUpRequest = (ConnectionSetupRequest) parseObj;
Map<String, String> labels = setUpRequest.getLabels();
String appName = "-";
if (labels != null && labels.containsKey(Constants.APPNAME)) {
appName = labels.get(Constants.APPNAME);
}
// 连接元数据
ConnectionMeta metaInfo = new ConnectionMeta(connectionId, payload.getMetadata().getClientIp(),
remoteIp, remotePort, localPort, ConnectionType.GRPC.getType(),
setUpRequest.getClientVersion(), appName, setUpRequest.getLabels());
metaInfo.setTenant(setUpRequest.getTenant());
// 创建 连接对象
GrpcConnection connection = new GrpcConnection(metaInfo, responseObserver,
GrpcServerConstants.CONTEXT_KEY_CHANNEL.get());
// null if supported
// 客户端的能力
if (setUpRequest.getAbilityTable() != null) {
// map to table
connection.setAbilityTable(setUpRequest.getAbilityTable());
}
boolean rejectSdkOnStarting = metaInfo.isSdkSource() && !ApplicationUtils.isStarted();
// 客户端 连接的保存
if (rejectSdkOnStarting || !connectionManager.register(connectionId, connection)) {
//Not register to the connection manager if current server is over limit or server is starting.
try {
Loggers.REMOTE_DIGEST.warn("[{}]Connection register fail,reason:{}", connectionId,
rejectSdkOnStarting ? " server is not started" : " server is over limited.");
connection.close();
} catch (Exception e) {
//Do nothing.
if (connectionManager.traced(clientIp)) {
Loggers.REMOTE_DIGEST
.warn("[{}]Send connect reset request error,error={}", connectionId, e);
}
}
} else {
try {
// server sends abilities only when:
// 1. client sends setUpRequest with its abilities table
// 2. client sends setUpRequest with empty table
if (setUpRequest.getAbilityTable() != null) {
// finish register, tell client has set up successfully
// async response without client ack
// 发送客户端 连接注册成功
connection.sendRequestNoAck(new SetupAckRequest(NacosAbilityManagerHolder.getInstance()
.getCurrentNodeAbilities(AbilityMode.SERVER)));
}
} catch (Exception e) {
// nothing to do
}
}
} else if (parseObj instanceof Response) {
// 其他的请求
Response response = (Response) parseObj;
if (connectionManager.traced(clientIp)) {
Loggers.REMOTE_DIGEST
.warn("[{}]Receive response of server request ,response={}", connectionId, response);
}
RpcAckCallbackSynchronizer.ackNotify(connectionId, response);
connectionManager.refreshActiveTime(connectionId);
} else {
Loggers.REMOTE_DIGEST
.warn("[{}]Grpc request bi stream,unknown payload receive ,parseObj={}", connectionId,
parseObj);
}
}
@Override
public void onError(Throwable t) {
if (connectionManager.traced(clientIp)) {
Loggers.REMOTE_DIGEST.warn("[{}]Bi stream on error,error={}", connectionId, t);
}
if (responseObserver instanceof ServerCallStreamObserver) {
ServerCallStreamObserver serverCallStreamObserver = ((ServerCallStreamObserver) responseObserver);
if (serverCallStreamObserver.isCancelled()) {
//client close the stream.
} else {
try {
serverCallStreamObserver.onCompleted();
} catch (Throwable throwable) {
//ignore
}
}
}
}
}
3) 客户端请求保存:
connectionManager.register
/**
* register a new connect.
*
* @param connectionId connectionId
* @param connection connection
*/
public synchronized boolean register(String connectionId, Connection connection) {
if (connection.isConnected()) {
// 通道已经建立
// 客户端ip
String clientIp = connection.getMetaInfo().clientIp;
// Map<String, Connection> connections
if (connections.containsKey(connectionId)) {
return true;
}
// 服务端连接数量检查
if (checkLimit(connection)) {
return false;
}
if (traced(clientIp)) {
connection.setTraced(true);
}
// 客户端连接 保存,后续可以使用改通道发送数据
// Map<String, Connection> connections = new ConcurrentHashMap<>();
connections.put(connectionId, connection);
// 同一台电脑 可以起多个服务连接到nacos 服务端,统计同一个ip 和nacos 服务端连接的数量
// Map<String, AtomicInteger> connectionForClientIp = new ConcurrentHashMap<>(16);
connectionForClientIp.computeIfAbsent(clientIp, k -> new AtomicInteger(0)).getAndIncrement();
// 发送客户端建立连接的事件 ConnectionBasedClientManager 创建通道对应的客户端
// 存储客户端的发布和订阅数据
// 创建客户端client 并放入到ConnectionBasedClientManager 下
// ConcurrentMap<String, ConnectionBasedClient> clients
clientConnectionEventListenerRegistry.notifyClientConnected(connection);
LOGGER.info("new connection registered successfully, connectionId = {},connection={} ", connectionId,
connection);
return true;
}
return false;
}
二、服务信息获取(故障转移):
- 使用场景:
服务信息获取(故障转移) 针对分布式环境下,当一段时间内没有和服务端进行通信,当客户端在访问同一个命名空间且同一个分组下的其他服务也可以拿掉对应服务的ip 和端口; - 实现方式:
每次客户端启动,都去加载本地缓存的 服务注册实例信息到内存中,当需要进行服务调用时可以方便的获取到服务信息
在NacosNamingService 的 init(properties) 方法中通过 ServiceInfoHolder 类来
2.1 ServiceInfoHolder
public ServiceInfoHolder(String namespace, String notifierEventScope, NacosClientProperties properties) {
// 缓存目录获取 window 默认地址 C:\Users\Administrator\nacos\naming\public
cacheDir = CacheDirUtil.initCacheDir(namespace, properties);
// 如果 设置了namingLoadCacheAtStart 为true
if (isLoadCacheAtStart(properties)) {
//从本地磁盘中加载属性 还原服务信息
this.serviceInfoMap = new ConcurrentHashMap<>(DiskCache.read(this.cacheDir));
} else {
// ConcurrentMap<String, ServiceInfo> serviceInfoMap; 属性初始化
this.serviceInfoMap = new ConcurrentHashMap<>(16);
}
// 故障转移
this.failoverReactor = new FailoverReactor(this, notifierEventScope);
// 是否推送空的属性
this.pushEmptyProtection = isPushEmptyProtect(properties);
this.notifierEventScope = notifierEventScope;
}
2.2 故障转移FailoverReactor:
public FailoverReactor(ServiceInfoHolder serviceInfoHolder, String cacheDir) {
this.serviceInfoHolder = serviceInfoHolder;
// 故障转移目录
this.failoverDir = cacheDir + FAILOVER_DIR;
// init executorService
this.executorService = new ScheduledThreadPoolExecutor(1, r -> {
Thread thread = new Thread(r);
thread.setDaemon(true);
thread.setName("com.alibaba.nacos.naming.failover");
return thread;
});
// 加载 FAILOVER_DIR 目录下的实例信息
this.init();
}
init() 对于服务实例的加载:
public void init() {
// 解析00-00---000-VIPSRV_FAILOVER_SWITCH-000---00-00 文件中的 数据,1 表示开启故障转移,0表示未开启
executorService.scheduleWithFixedDelay(new SwitchRefresher(), 0L, 5000L, TimeUnit.MILLISECONDS);
executorService.scheduleWithFixedDelay(new DiskFileWriter(), 30, DAY_PERIOD_MINUTES, TimeUnit.MINUTES);
// backup file on startup if failover directory is empty.
executorService.schedule(() -> {
try {
// 获取故障转移目录下的所有文件
File cacheDir = new File(failoverDir);
if (!cacheDir.exists() && !cacheDir.mkdirs()) {
throw new IllegalStateException("failed to create cache dir: " + failoverDir);
}
File[] files = cacheDir.listFiles();
if (files == null || files.length <= 0) {
// 将当前命名空间下的是信息写到 故障转移目录
new DiskFileWriter().run();
}
} catch (Throwable e) {
NAMING_LOGGER.error("[NA] failed to backup file on startup.", e);
}
}, 10000L, TimeUnit.MILLISECONDS);
}
总结
本文对客户端服务信息获取的故障转移细节,及客户端同服务端的grpc 连接建立进行介绍。