接着上篇,這篇分析mgr的啓動
mgr工作原理
工作的模式是事件驱动型的,意思就是等待事件,事件来了则处理事件返回结果,又继续等待
负责跟踪运行时间指标和Ceph群集的当前状态,包括存储利用率,当前性能指标和系统负载。 Ceph Manager守护程序还托管基于python的插件来管理和公开Ceph集群信息,包括基于Web的仪表板和REST API。
启动命令
ceph-mgr --fsid=47913c99-d1ed-4a9b-bd55-2b49490b1229 --keyring=/etc/ceph/keyring-store/keyring --log-to-stderr=true --err-to-stderr=true --mon-cluster-log-to-stderr=true --log-stderr-prefix=debug --mon-host=[v2:10.200.113.13:3300,v1:10.200.113.13:6789] --mon-initial-members=a --id=a --foreground
1. start 函數
路徑: pkg/operator/ceph/cluster/mgr/mgr.go
1.1 generateKeyring函數
調用命令 ceph auth get-or-create-key mgr.a mon allow * mds allow * osd allow * --connect-timeout=15 --cluster=rook-ceph --conf=/var/lib/rook/rook-ceph/rook-ceph.config --keyring=/var/lib/rook/rook-ceph/client.admin.keyring --format json --out-file /tmp/156575653 生成mgr的secret
CreateOrUpdate函數创建 secret
func (c *Cluster) generateKeyring(m *mgrConfig) error {
user := fmt.Sprintf("mgr.%s", m.DaemonID)
/* TODO: the access string here does not match the access from the keyring template. should they match? */
access := []string{"mon", "allow *", "mds", "allow *", "osd", "allow *"}
/* TODO: can we change this ownerref to be the deployment or service? */
s := keyring.GetSecretStore(c.context, c.Namespace, &c.ownerRef)
key, err := s.GenerateKey(m.ResourceName, user, access)
if err != nil {
return err
}
keyring := fmt.Sprintf(keyringTemplate, m.DaemonID, key)
return s.CreateOrUpdate(m.ResourceName, keyring)
}
1.2 创建deployment的mgr服务
// start the deployment
d := c.makeDeployment(mgrConfig)
logger.Debugf("starting mgr deployment: %+v", d)
_, err := c.context.Clientset.AppsV1().Deployments(c.Namespace).Create(d)
if err != nil {
if !errors.IsAlreadyExists(err) {
return fmt.Errorf("failed to create mgr deployment %s. %+v", resourceName, err)
}
logger.Infof("deployment for mgr %s already exists. updating if needed", resourceName)
if _, err := updateDeploymentAndWait(c.context, d, c.Namespace); err != nil {
return fmt.Errorf("failed to update mgr deployment %s. %+v", resourceName, err)
}
}
关于 http://docs.ceph.com/docs/master/mgr/orchestrator_cli/
1.3 configureOrchestratorModules函數
开启mgr CLI 模块, 至少N版本,ceph mgr module enable orchestrator_cli --force --connect-timeout=15 --cluster=rook-ceph --conf=/var/lib/rook/rook-ceph/rook-ceph.config --keyring=/var/lib/rook/rook-ceph/client.admin.keyring --format json --out-file /tmp/566259392
ceph mgr module enable rook --force --connect-timeout=15 --cluster=rook-ceph --conf=/var/lib/rook/rook-ceph/rook-ceph.config --keyring=/var/lib/rook/rook-ceph/client.admin.keyring --format json --out-file /tmp/714929951
ceph orchestrator set backend rook --connect-timeout=15 --cluster=rook-ceph --conf=/var/lib/rook/rook-ceph/rook-ceph.config --keyring=/var/lib/rook/rook-ceph/client.admin.keyring --format json --out-file /tmp/525522418
// Ceph docs about the orchestrator modules: http://docs.ceph.com/docs/master/mgr/orchestrator_cli/
func (c *Cluster) configureOrchestratorModules() error {
if err := client.MgrEnableModule(c.context, c.Namespace, orchestratorModuleName, true); err != nil {
return fmt.Errorf("failed to enable mgr orchestrator module. %+v", err)
}
if err := client.MgrEnableModule(c.context, c.Namespace, rookModuleName, true); err != nil {
return fmt.Errorf("failed to enable mgr rook module. %+v", err)
}
// retry a few times in the case that the mgr module is not ready to accept commands
_, err := client.ExecuteCephCommandWithRetry(func() ([]byte, error) {
args := []string{"orchestrator", "set", "backend", "rook"}
return client.ExecuteCephCommand(c.context, c.Namespace, args)
}, c.exitCode, 5, invalidArgErrorCode, orchestratorInitWaitTime)
return nil
}
1.4 开启prometheus模块
// Ceph docs about the prometheus module: http://docs.ceph.com/docs/master/mgr/prometheus/
func (c *Cluster) enablePrometheusModule(clusterName string) error {
if err := client.MgrEnableModule(c.context, clusterName, prometheusModuleName, true); err != nil {
return fmt.Errorf("failed to enable mgr prometheus module. %+v", err)
}
return nil
}
1.5 配置dashboard 主要創建service
if err := c.configureDashboard(mgrConfig); err != nil {
logger.Errorf("failed to enable mgr dashboard. %+v", err)
}
1.6 創建metrics的service
// create the metrics service
service := c.makeMetricsService(appName)
if _, err := c.context.Clientset.CoreV1().Services(c.Namespace).Create(service); err != nil {
if !errors.IsAlreadyExists(err) {
return fmt.Errorf("failed to create mgr service. %+v", err)
}
logger.Infof("mgr metrics service already exists")
} else {
logger.Infof("mgr metrics service started")
}
mgr deployment
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: "2019-05-23T06:54:07Z"
generateName: rook-ceph-mgr-a-854659bc84-
labels:
app: rook-ceph-mgr
ceph_daemon_id: a
instance: a
mgr: a
pod-template-hash: 854659bc84
rook_cluster: rook-ceph
name: rook-ceph-mgr-a-854659bc84-h898p
namespace: rook-ceph
ownerReferences:
- apiVersion: apps/v1
blockOwnerDeletion: true
controller: true
kind: ReplicaSet
name: rook-ceph-mgr-a-854659bc84
uid: 9ca642fd-7d26-11e9-82d7-0800271c9f15
resourceVersion: "140850"
selfLink: /api/v1/namespaces/rook-ceph/pods/rook-ceph-mgr-a-854659bc84-h898p
uid: 8f7cf21d-7d27-11e9-bfb6-0800271c9f15
spec:
affinity: {}
containers:
- args:
- --fsid=dcef92d7-1f6a-4b9d-8ed0-0037d537d00b
- --keyring=/etc/ceph/keyring-store/keyring
- --log-to-stderr=true
- --err-to-stderr=true
- --mon-cluster-log-to-stderr=true
- '--log-stderr-prefix=debug '
- --mon-host=$(ROOK_CEPH_MON_HOST)
- --mon-initial-members=$(ROOK_CEPH_MON_INITIAL_MEMBERS)
- --id=a
- --foreground
command:
- ceph-mgr
env:
- name: CONTAINER_IMAGE
value: ceph/ceph:v13.2.3-20190410
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: POD_MEMORY_LIMIT
valueFrom:
resourceFieldRef:
divisor: "0"
resource: limits.memory
- name: POD_MEMORY_REQUEST
valueFrom:
resourceFieldRef:
divisor: "0"
resource: requests.memory
- name: POD_CPU_LIMIT
valueFrom:
resourceFieldRef:
divisor: "1"
resource: limits.cpu
- name: POD_CPU_REQUEST
valueFrom:
resourceFieldRef:
divisor: "0"
resource: requests.cpu
- name: ROOK_CEPH_MON_HOST
valueFrom:
secretKeyRef:
key: mon_host
name: rook-ceph-config
- name: ROOK_CEPH_MON_INITIAL_MEMBERS
valueFrom:
secretKeyRef:
key: mon_initial_members
name: rook-ceph-config
- name: ROOK_OPERATOR_NAMESPACE
value: rook-ceph
- name: ROOK_CEPH_CLUSTER_CRD_VERSION
value: v1
- name: ROOK_VERSION
value: v1.0.1
- name: ROOK_CEPH_CLUSTER_CRD_NAME
value: rook-ceph
image: ceph/ceph:v13.2.3-20190410
imagePullPolicy: IfNotPresent
name: mgr
ports:
- containerPort: 6800
name: mgr
protocol: TCP
- containerPort: 9283
name: http-metrics
protocol: TCP
- containerPort: 8443
name: dashboard
protocol: TCP
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/ceph
name: rook-ceph-config
readOnly: true
- mountPath: /etc/ceph/keyring-store/
name: rook-ceph-mgr-a-keyring
readOnly: true
- mountPath: /var/log/ceph
name: rook-ceph-log
- mountPath: /var/lib/ceph/mgr/ceph-a
name: ceph-daemon-data
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: rook-ceph-mgr-token-zmgrp
readOnly: true
dnsPolicy: ClusterFirst
enableServiceLinks: true
initContainers:
- args:
- --fsid=dcef92d7-1f6a-4b9d-8ed0-0037d537d00b
- --keyring=/etc/ceph/admin-keyring-store/keyring
- --log-to-stderr=true
- --err-to-stderr=true
- --mon-cluster-log-to-stderr=true
- '--log-stderr-prefix=debug '
- --mon-host=$(ROOK_CEPH_MON_HOST)
- --mon-initial-members=$(ROOK_CEPH_MON_INITIAL_MEMBERS)
- config
- set
- mgr.a
- mgr/dashboard/a/server_addr
- $(ROOK_POD_IP)
- --verbose
command:
- ceph
env:
- name: CONTAINER_IMAGE
value: ceph/ceph:v13.2.3-20190410
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: POD_MEMORY_LIMIT
valueFrom:
resourceFieldRef:
divisor: "0"
resource: limits.memory
- name: POD_MEMORY_REQUEST
valueFrom:
resourceFieldRef:
divisor: "0"
resource: requests.memory
- name: POD_CPU_LIMIT
valueFrom:
resourceFieldRef:
divisor: "1"
resource: limits.cpu
- name: POD_CPU_REQUEST
valueFrom:
resourceFieldRef:
divisor: "0"
resource: requests.cpu
- name: ROOK_CEPH_MON_HOST
valueFrom:
secretKeyRef:
key: mon_host
name: rook-ceph-config
- name: ROOK_CEPH_MON_INITIAL_MEMBERS
valueFrom:
secretKeyRef:
key: mon_initial_members
name: rook-ceph-config
- name: ROOK_POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: ROOK_OPERATOR_NAMESPACE
value: rook-ceph
- name: ROOK_CEPH_CLUSTER_CRD_VERSION
value: v1
- name: ROOK_VERSION
value: v1.0.1
- name: ROOK_CEPH_CLUSTER_CRD_NAME
value: rook-ceph
image: ceph/ceph:v13.2.3-20190410
imagePullPolicy: IfNotPresent
name: init-set-dashboard-server-addr
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/ceph
name: rook-ceph-config
readOnly: true
- mountPath: /etc/ceph/keyring-store/
name: rook-ceph-mgr-a-keyring
readOnly: true
- mountPath: /var/log/ceph
name: rook-ceph-log
- mountPath: /var/lib/ceph/mgr/ceph-a
name: ceph-daemon-data
- mountPath: /etc/ceph/admin-keyring-store/
name: rook-ceph-admin-keyring
readOnly: true
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: rook-ceph-mgr-token-zmgrp
readOnly: true
- args:
- --fsid=dcef92d7-1f6a-4b9d-8ed0-0037d537d00b
- --keyring=/etc/ceph/admin-keyring-store/keyring
- --log-to-stderr=true
- --err-to-stderr=true
- --mon-cluster-log-to-stderr=true
- '--log-stderr-prefix=debug '
- --mon-host=$(ROOK_CEPH_MON_HOST)
- --mon-initial-members=$(ROOK_CEPH_MON_INITIAL_MEMBERS)
- config
- set
- mgr.a
- mgr/prometheus/a/server_addr
- $(ROOK_POD_IP)
- --verbose
command:
- ceph
env:
- name: CONTAINER_IMAGE
value: ceph/ceph:v13.2.3-20190410
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: POD_MEMORY_LIMIT
valueFrom:
resourceFieldRef:
divisor: "0"
resource: limits.memory
- name: POD_MEMORY_REQUEST
valueFrom:
resourceFieldRef:
divisor: "0"
resource: requests.memory
- name: POD_CPU_LIMIT
valueFrom:
resourceFieldRef:
divisor: "1"
resource: limits.cpu
- name: POD_CPU_REQUEST
valueFrom:
resourceFieldRef:
divisor: "0"
resource: requests.cpu
- name: ROOK_CEPH_MON_HOST
valueFrom:
secretKeyRef:
key: mon_host
name: rook-ceph-config
- name: ROOK_CEPH_MON_INITIAL_MEMBERS
valueFrom:
secretKeyRef:
key: mon_initial_members
name: rook-ceph-config
- name: ROOK_POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: ROOK_OPERATOR_NAMESPACE
value: rook-ceph
- name: ROOK_CEPH_CLUSTER_CRD_VERSION
value: v1
- name: ROOK_VERSION
value: v1.0.1
- name: ROOK_CEPH_CLUSTER_CRD_NAME
value: rook-ceph
image: ceph/ceph:v13.2.3-20190410
imagePullPolicy: IfNotPresent
name: init-set-prometheus-server-addr
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/ceph
name: rook-ceph-config
readOnly: true
- mountPath: /etc/ceph/keyring-store/
name: rook-ceph-mgr-a-keyring
readOnly: true
- mountPath: /var/log/ceph
name: rook-ceph-log
- mountPath: /var/lib/ceph/mgr/ceph-a
name: ceph-daemon-data
- mountPath: /etc/ceph/admin-keyring-store/
name: rook-ceph-admin-keyring
readOnly: true
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: rook-ceph-mgr-token-zmgrp
readOnly: true
nodeName: master-node
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: rook-ceph-mgr
serviceAccountName: rook-ceph-mgr
terminationGracePeriodSeconds: 30
volumes:
- configMap:
defaultMode: 420
items:
- key: ceph.conf
mode: 256
path: ceph.conf
name: rook-ceph-config
name: rook-ceph-config
- name: rook-ceph-mgr-a-keyring
secret:
defaultMode: 420
secretName: rook-ceph-mgr-a-keyring
- hostPath:
path: /var/lib/rook/rook-ceph/log
type: ""
name: rook-ceph-log
- emptyDir: {}
name: ceph-daemon-data
- name: rook-ceph-admin-keyring
secret:
defaultMode: 420
secretName: rook-ceph-admin-keyring
- name: rook-ceph-mgr-token-zmgrp
secret:
defaultMode: 420
secretName: rook-ceph-mgr-token-zmgrp
status:
conditions:
- lastProbeTime: null
lastTransitionTime: "2019-05-23T07:42:00Z"
status: "True"
type: Initialized
- lastProbeTime: null
lastTransitionTime: "2019-05-23T07:42:04Z"
status: "True"
type: Ready
- lastProbeTime: null
lastTransitionTime: "2019-05-23T07:42:04Z"
status: "True"
type: ContainersReady
- lastProbeTime: null
lastTransitionTime: "2019-05-23T06:54:08Z"
status: "True"
type: PodScheduled
containerStatuses:
- containerID: docker://518dc481835f0d06b5bc9b05846e0a4b47aaef9f95ac32fb382d215b89f93fd7
image: ceph/ceph:v13.2.3-20190410
imageID: docker://sha256:fdb3585c96619a300dc2f153a3269c7b6e222adce9eed6ec199dc54302b9195a
lastState: {}
name: mgr
ready: true
restartCount: 2
state:
running:
startedAt: "2019-05-23T07:42:03Z"
hostIP: 192.168.74.57
initContainerStatuses:
- containerID: docker://00c13ba2ab4df98edd969ba7bf50ae61af771a58b2bba333c34f33506a51d867
image: ceph/ceph:v13.2.3-20190410
imageID: docker://sha256:fdb3585c96619a300dc2f153a3269c7b6e222adce9eed6ec199dc54302b9195a
lastState: {}
name: init-set-dashboard-server-addr
ready: true
restartCount: 2
state:
terminated:
containerID: docker://00c13ba2ab4df98edd969ba7bf50ae61af771a58b2bba333c34f33506a51d867
exitCode: 0
finishedAt: "2019-05-23T07:41:53Z"
reason: Completed
startedAt: "2019-05-23T07:41:40Z"
- containerID: docker://b43e61a9e183c2126cd3a6e862bddd72958f5a589f28e58d90f1ab6ceed54edf
image: ceph/ceph:v13.2.3-20190410
imageID: docker://sha256:fdb3585c96619a300dc2f153a3269c7b6e222adce9eed6ec199dc54302b9195a
lastState: {}
name: init-set-prometheus-server-addr
ready: true
restartCount: 0
state:
terminated:
containerID: docker://b43e61a9e183c2126cd3a6e862bddd72958f5a589f28e58d90f1ab6ceed54edf
exitCode: 0
finishedAt: "2019-05-23T07:41:59Z"
reason: Completed
startedAt: "2019-05-23T07:41:55Z"
phase: Running
podIP: 192.170.56.86
qosClass: BestEffort
startTime: "2019-05-23T06:54:10Z"
总结:
mgr 进程比较简单,包括生成secret,deployment