上文已经分析到,请求最终通过RPC到了cinder-scheduler中
cinder-scheduler通过taskflow管理了2个任务(cinder/scheduler/flows/create_volume.py):
ExtractSchedulerSpecTask
该任务主要是封装参数给下一任务,无revert函数
def _populate_request_spec(self, volume, snapshot_id, image_id, backup_id):
# Create the full request spec using the volume object.
#
# NOTE(dulek): At this point, a volume can be deleted before it gets
# scheduled. If a delete API call is made, the volume gets instantly
# delete and scheduling will fail when it tries to update the DB entry
# (with the host) in ScheduleCreateVolumeTask below.
volume_type_id = volume.volume_type_id
vol_type = volume.volume_type
return {
'volume_id': volume.id,
'snapshot_id': snapshot_id,
'image_id': image_id,
'backup_id': backup_id,
'volume_properties': {
'size': utils.as_int(volume.size, quiet=False),
'availability_zone': volume.availability_zone,
'volume_type_id': volume_type_id,
},
'volume_type': list(dict(vol_type).items()),
}
def execute(self, context, request_spec, volume, snapshot_id,
image_id, backup_id):
# For RPC version < 1.2 backward compatibility
if request_spec is None:
request_spec = self._populate_request_spec(volume,
snapshot_id, image_id,
backup_id)
return {
'request_spec': request_spec,
}
ScheduleCreateVolumeTask(重要)
def execute(self, context, request_spec, filter_properties, volume):
try:
self.driver_api.schedule_create_volume(context, request_spec,
filter_properties)
except Exception as e:
self.message_api.create(
context,
message_field.Action.SCHEDULE_ALLOCATE_VOLUME,
resource_uuid=request_spec['volume_id'],
exception=e)
# An error happened, notify on the scheduler queue and log that
# this happened and set the volume to errored out and reraise the
# error *if* exception caught isn't NoValidBackend. Otherwise *do
# not* reraise (since what's the point?)
with excutils.save_and_reraise_exception(
reraise=not isinstance(e, exception.NoValidBackend)):
try:
self._handle_failure(context, request_spec, e)
finally:
common.error_out(volume, reason=e)
注:scheduler_driver是在启动时初始化,默认driver管理类是在配置文件中进行配置:
scheduler_driver_opt = cfg.StrOpt('scheduler_driver',
default='cinder.scheduler.filter_scheduler.'
'FilterScheduler',
help='Default scheduler driver to use')
class SchedulerManager(manager.CleanableManager, manager.Manager):
"""Chooses a host to create volumes."""
RPC_API_VERSION = scheduler_rpcapi.SchedulerAPI.RPC_API_VERSION
target = messaging.Target(version=RPC_API_VERSION)
def __init__(self, scheduler_driver=None, service_name=None,
*args, **kwargs):
if not scheduler_driver:
scheduler_driver = CONF.scheduler_driver
self.driver = importutils.import_object(scheduler_driver)
self.driver_api.schedule_create_volume的入口类:cinder/scheduler/filter_scheduler.py
def schedule_create_volume(self, context, request_spec, filter_properties):
// 重要函数, 通过请求信息找到合适的backend
backend = self._schedule(context, request_spec, filter_properties)
if not backend:
raise exception.NoValidBackend(reason=_("No weighed backends "
"available"))
backend = backend.obj
volume_id = request_spec['volume_id']
// 更新数据库中volume信息的backend
updated_volume = driver.volume_update_db(
context, volume_id,
backend.host,
backend.cluster_name,
availability_zone=backend.service['availability_zone'])
self._post_select_populate_filter_properties(filter_properties,
backend)
# context is not serializable
filter_properties.pop('context', None)
// RPC调用cinder-volume创建云硬盘
self.volume_rpcapi.create_volume(context, updated_volume, request_spec,
filter_properties,
allow_reschedule=True)
_schedule函数的信息如下:
def _schedule(self, context, request_spec, filter_properties=None):
// 根据请求获取符合权重的backends
weighed_backends = self._get_weighted_candidates(context, request_spec,
filter_properties)
# When we get the weighed_backends, we clear those backends that don't
# match the resource's backend (it could be assigend from group,
# snapshot or volume).
// 该处是用于和resource_backend对比,如果是从快照创建或者从云硬盘创建
resource_backend = request_spec.get('resource_backend')
if weighed_backends and resource_backend:
resource_backend_has_pool = bool(utils.extract_host(
resource_backend, 'pool'))
# Get host name including host@backend#pool info from
# weighed_backends.
for backend in weighed_backends[::-1]:
backend_id = (
backend.obj.backend_id if resource_backend_has_pool
else utils.extract_host(backend.obj.backend_id)
)
if backend_id != resource_backend:
weighed_backends.remove(backend)
// 没有符合权重的backend则返回None
if not weighed_backends:
LOG.warning('No weighed backend found for volume '
'with properties: %s',
filter_properties['request_spec'].get('volume_type'))
return None
// 选择第一个
return self._choose_top_backend(weighed_backends, request_spec)
_get_weighted_candidates函数如下:
def _get_weighted_candidates(self, context, request_spec,
filter_properties=None):
"""Return a list of backends that meet required specs.
Returned list is ordered by their fitness.
"""
elevated = context.elevated()
# Since Cinder is using mixed filters from Oslo and it's own, which
# takes 'resource_XX' and 'volume_XX' as input respectively, copying
# 'volume_XX' to 'resource_XX' will make both filters happy.
volume_type = request_spec.get("volume_type")
resource_type = volume_type if volume_type is not None else {}
// 参数处理,省略部分代码
......
# Find our local list of acceptable backends by filtering and
# weighing our options. we virtually consume resources on
# it so subsequent selections can adjust accordingly.
# Note: remember, we are using an iterator here. So only
# traverse this list once.
// 首先获取所有的backend, 从service表中获取所有的topic=cinder-volume,并且没有
// 被disable,状态为up的backend,可以直接通过cinder service-list查看
backends = self.host_manager.get_all_backend_states(elevated)
// 重要:根据filter信息过滤符合要求的backend
# Filter local hosts based on requirements ...
backends = self.host_manager.get_filtered_backends(backends,
filter_properties)
if not backends:
return []
LOG.debug("Filtered %s", backends)
# weighted_backends = WeightedHost() ... the best
# backend for the job.
weighed_backends = self.host_manager.get_weighed_backends(
backends, filter_properties)
return weighed_backends
self.host_manager对应的是cinder/scheduler/host_manager.py
查看get_filtered_backends(获取满足条件的backends)
def get_filtered_backends(self, backends, filter_properties,
filter_class_names=None):
"""Filter backends and return only ones passing all filters."""
if filter_class_names is not None:
// 如果指定过滤类的话,直接获取
filter_classes = self._choose_backend_filters(filter_class_names)
else:
// 使用配置enabled的filters,默认是三个
filter_classes = self.enabled_filters
return self.filter_handler.get_filtered_objects(filter_classes,
backends,
filter_properties)
==========================================
sekf.enabled_filters是HostManager初始化的时候通过配置获取的,相关代码如下:
self.enabled_filters = self._choose_backend_filters(
CONF.scheduler_default_filters)
==========================================
默认的配置scheduler_default_filters是三个过滤器
host_manager_opts = [
cfg.ListOpt('scheduler_default_filters',
default=[
'AvailabilityZoneFilter',
'CapacityFilter',
'CapabilitiesFilter'
],
help='Which filter class names to use for filtering hosts '
'when not specified in the request.'),
cfg.ListOpt('scheduler_default_weighers',
default=[
'CapacityWeigher'
],
help='Which weigher class names to use for weighing hosts.'),
cfg.StrOpt('scheduler_weight_handler',
default='cinder.scheduler.weights.OrderedHostWeightHandler',
help='Which handler to use for selecting the host/pool '
'after weighing'),
]
self.filter_handler.get_filtered_objects对应的类:cinder/scheduler/base_filter.py
def get_filtered_objects(self, filter_classes, objs,
filter_properties, index=0):
"""Get objects after filter
:param filter_classes: filters that will be used to filter the
objects
:param objs: objects that will be filtered
:param filter_properties: client filter properties
:param index: This value needs to be increased in the caller
function of get_filtered_objects when handling
each resource.
"""
list_objs = list(objs)
LOG.debug("Starting with %d host(s)", len(list_objs))
part_filter_results = []
full_filter_results = []
// 遍历filter实现类,默认是如下三个
//'cinder.scheduler.filters.availability_zone_filter.AvailabilityZoneFilter'
//'cinder.scheduler.filters.capacity_filter.CapacityFilter'
//'cinder.scheduler.filters.capabilities_filter.CapabilitiesFilter'
for filter_cls in filter_classes:
cls_name = filter_cls.__name__
start_count = len(list_objs)
filter_class = filter_cls()
if filter_class.run_filter_for_index(index):
// 调用fileter_all对已有的backends进行过滤,返回满足要求的backends
objs = filter_class.filter_all(list_objs, filter_properties)
if objs is None:
LOG.info("Filter %s returned 0 hosts", cls_name)
full_filter_results.append((cls_name, None))
list_objs = None
break
list_objs = list(objs)
end_count = len(list_objs)
part_filter_results.append((cls_name, start_count, end_count))
remaining = [getattr(obj, "host", obj)
for obj in list_objs]
full_filter_results.append((cls_name, remaining))
LOG.debug("Filter %(cls_name)s returned "
"%(obj_len)d host(s)",
{'cls_name': cls_name, 'obj_len': len(list_objs)})
if not list_objs:
self._log_filtration(full_filter_results,
part_filter_results, filter_properties)
return list_objs
=============================================================================
各个filter类的继承关系:
AvailabilityZoneFilter/xxx -> BaseBackendFilter -> BaseFilter
class AvailabilityZoneFilter(filters.BaseBackendFilter):
class BaseBackendFilter(base_filter.BaseFilter):
其中filter_all方法在BaseFilter父类实现:
class BaseFilter(object):
"""Base class for all filter classes."""
def _filter_one(self, obj, filter_properties):
"""Return True if it passes the filter, False otherwise.
Override this in a subclass.
"""
return True
def filter_all(self, filter_obj_list, filter_properties):
"""Yield objects that pass the filter.
Can be overridden in a subclass, if you need to base filtering
decisions on all objects. Otherwise, one can just override
_filter_one() to filter a single object.
"""
// 实际上就是遍历backends,调用_filter_one方法
for obj in filter_obj_list:
if self._filter_one(obj, filter_properties):
yield obj
_filter_one方法在BaseBackendFilter父类实现:
class BaseBackendFilter(base_filter.BaseFilter):
"""Base class for host filters."""
def _filter_one(self, obj, filter_properties):
"""Return True if the object passes the filter, otherwise False."""
# For backward compatibility with out of tree filters
// 实质上是调用host_passes或者backend_passes方法,
// 最终的backends_passes方法在各个子类中实现
passes_method = getattr(self, 'host_passes', self.backend_passes)
return passes_method(obj, filter_properties)
def backend_passes(self, host_state, filter_properties):
"""Return True if the HostState passes the filter, otherwise False.
Override this in a subclass.
"""
raise NotImplementedError()
说明:三个filter都是继承
接下来依次分析各个filter
AvailabilityZoneFilter:cinder/scheduler/filters/availability_zone_filter.py
主要是根据az进行判断
class AvailabilityZoneFilter(filters.BaseBackendFilter):
"""Filters Backends by availability zone."""
# Availability zones do not change within a request
run_filter_once_per_request = True
def backend_passes(self, backend_state, filter_properties):
spec = filter_properties.get('request_spec', {})
availability_zones = spec.get('availability_zones')
// 如果指定了az,则判断当前的backends的az是否在指定的az中
if availability_zones:
return (backend_state.service['availability_zone']
in availability_zones)
props = spec.get('resource_properties', {})
availability_zone = props.get('availability_zone')
if availability_zone:
return (availability_zone ==
backend_state.service['availability_zone'])
return True
CapacityFilter:cinder/scheduler/filters/capacity_filter.py
主要是根据大小判断,判断backend剩余大小是否>请求创建的硬盘大小
核心代码如下:
if free < requested_size:
LOG.warning("Insufficient free space for volume creation "
"on %(grouping)s %(grouping_name)s (requested / "
"avail): %(requested)s/%(available)s",
msg_args)
return False
所有的filter过滤完成后,会调用get_weighed_backends获取权重最高的一个,模式类似filters
具体的实现类是通过scheduler_default_weighers配置的,如果没有配置则默认是:CapacityWeigher
cinder/scheduler/weights/capacity.py
接上调度,调度完后会发送rpc消息给cinder-volume创建卷
cinder-volume接受请求的入口:
cinder/volume/manager.py: create_volume
@objects.Volume.set_workers
def create_volume(self, context, volume, request_spec=None,
filter_properties=None, allow_reschedule=True):
"""Creates the volume."""
.....
try:
# NOTE(flaper87): Driver initialization is
# verified by the task itself.
flow_engine = create_volume.get_flow(
context_elevated,
self,
self.db,
self.driver,
self.scheduler_rpcapi,
self.host,
volume,
allow_reschedule,
context,
request_spec,
filter_properties,
image_volume_cache=self.image_volume_cache,
)
except Exception:
msg = _("Create manager volume flow failed.")
LOG.exception(msg, resource={'type': 'volume', 'id': volume.id})
raise exception.CinderException(msg)
......
cinder-volume同样以taskflow管理任务(cinder/volume/flows/manager/create_volume.py):
volume_flow.add(ExtractVolumeSpecTask(db),
NotifyVolumeActionTask(db, "create.start"),
CreateVolumeFromSpecTask(manager,
db,
driver,
image_volume_cache),
CreateVolumeOnFinishTask(db, "create.end"))
ExtractVolumeSpecTask:封装参数信息
NotifyVolumeActionTask:通知volume.create.start任务
CreateVolumeFromSpecTask:创建volume任务(非常重要!重点说明)
CreateVolumeOnFinishTask:创建后的处理,包括通知create.end, 更新数据库时间等
====================================
CreateVolumeFromSpecTask说明:
def execute(self, context, volume, volume_spec):
volume_spec = dict(volume_spec)
volume_id = volume_spec.pop('volume_id', None)
if not volume_id:
volume_id = volume.id
# we can't do anything if the driver didn't init
if not self.driver.initialized:
driver_name = self.driver.__class__.__name__
LOG.error("Unable to create volume. "
"Volume driver %s not initialized", driver_name)
raise exception.DriverNotInitialized()
......
# 根据类型做不同的处理
# 主要包括创建裸卷,基于快照创建卷,基于卷创建卷等
create_type = volume_spec.pop('type', None)
LOG.info("Volume %(volume_id)s: being created as %(create_type)s "
"with specification: %(volume_spec)s",
{'volume_spec': volume_spec, 'volume_id': volume_id,
'create_type': create_type})
if create_type == 'raw':
model_update = self._create_raw_volume(
context, volume, **volume_spec)
elif create_type == 'snap':
model_update = self._create_from_snapshot(context, volume,
**volume_spec)
elif create_type == 'source_vol':
model_update = self._create_from_source_volume(
context, volume, **volume_spec)
elif create_type == 'image':
model_update = self._create_from_image(context,
volume,
**volume_spec)
elif create_type == 'backup':
model_update, need_update_volume = self._create_from_backup(
context, volume, **volume_spec)
volume_spec.update({'need_update_volume': need_update_volume})
else:
raise exception.VolumeTypeNotFound(volume_type_id=create_type)
1. 创建裸盘的方式
_create_raw_volume
def _create_raw_volume(self, context, volume, **kwargs):
# 直接调用driver驱动创建卷
try:
ret = self.driver.create_volume(volume)
except Exception as ex:
with excutils.save_and_reraise_exception():
self.message.create(
context,
message_field.Action.CREATE_VOLUME_FROM_BACKEND,
resource_uuid=volume.id,
detail=message_field.Detail.DRIVER_FAILED_CREATE,
exception=ex)
finally:
self._cleanup_cg_in_volume(volume)
return ret
=======================================
driver是根据cinder的配置,相关的驱动都在cinder/volume/drivers/目录下
本次先查看基础的ceph驱动,驱动实现类:cinder/volume/drivers/rbd.py
def create_volume(self, volume):
"""Creates a logical volume."""
if volume.encryption_key_id:
return self._create_encrypted_volume(volume, volume.obj_context)
size = int(volume.size) * units.Gi
LOG.debug("creating volume '%s'", volume.name)
chunk_size = self.configuration.rbd_store_chunk_size * units.Mi
order = int(math.log(chunk_size, 2))
vol_name = utils.convert_str(volume.name)
# 该方式比较简单,直接调用ceph的rbd接口进行创建
with RADOSClient(self) as client:
self.RBDProxy().create(client.ioctx,
vol_name, // 卷名称, ceph中一般是volume id
size, // 卷大小
order,
old_format=False,
features=client.features)
try:
# 特殊的设置(比如启动replication,设置replication_status,设置multiattach等)
volume_update = self._setup_volume(volume)
except Exception:
with excutils.save_and_reraise_exception():
_create_from_snapshot(基于快照创建卷)
def _create_from_snapshot(self, context, volume, snapshot_id,
**kwargs):
# 获取快照信息
snapshot = objects.Snapshot.get_by_id(context, snapshot_id)
try:
# 调用驱动基于快照创建卷
model_update = self.driver.create_volume_from_snapshot(volume,
snapshot)
finally:
self._cleanup_cg_in_volume(volume)
# NOTE(harlowja): Subtasks would be useful here since after this
# point the volume has already been created and further failures
# will not destroy the volume (although they could in the future).
make_bootable = False
try:
originating_vref = objects.Volume.get_by_id(context,
snapshot.volume_id)
make_bootable = originating_vref.bootable
except exception.CinderException as ex:
LOG.exception("Failed fetching snapshot %(snapshot_id)s bootable"
" flag using the provided glance snapshot "
"%(snapshot_ref_id)s volume reference",
{'snapshot_id': snapshot_id,
'snapshot_ref_id': snapshot.volume_id})
raise exception.MetadataUpdateFailure(reason=ex)
if make_bootable:
self._handle_bootable_volume_glance_meta(context, volume,
snapshot_id=snapshot_id)
return model_update
============================================================
查看驱动的create_volume_from_snapshot:cinder/volume/drivers/rbd.py
def create_volume_from_snapshot(self, volume, snapshot):
"""Creates a volume from a snapshot."""
# 基于快照创建卷实际上就是存储的克隆功能,直接调用克隆接口
volume_update = self._clone(volume, self.configuration.rbd_pool,
snapshot.volume_name, snapshot.name)
# Ceph支持配置克隆是否拍平
if self.configuration.rbd_flatten_volume_from_snapshot:
self._flatten(self.configuration.rbd_pool, volume.name)
# 克隆的卷和快照的原卷大小是一样的,但是基于快照创建卷时可以重新指定大小,所以卷创建完成后需要进行resize
if int(volume.size):
self._resize(volume)
self._show_msg_check_clone_v2_api(snapshot.volume_name)
return volume_update
_create_from_source_volume(基于卷创建卷)
def _create_from_source_volume(self, context, volume, source_volid,
**kwargs):
# 从数据库获取源卷信息
srcvol_ref = objects.Volume.get_by_id(context, source_volid)
try:
# 调用驱动接口
model_update = self.driver.create_cloned_volume(volume, srcvol_ref)
if model_update is None:
model_update = {}
if volume.encryption_key_id is not None:
rekey_model_update = self._rekey_volume(context, volume)
model_update.update(rekey_model_update)
finally:
self._cleanup_cg_in_volume(volume)
# NOTE(harlowja): Subtasks would be useful here since after this
# point the volume has already been created and further failures
# will not destroy the volume (although they could in the future).
if srcvol_ref.bootable:
self._handle_bootable_volume_glance_meta(
context, volume, source_volid=srcvol_ref.id)
return model_update
===============================================================
create_cloned_volume: cinder/volume/drivers/rbd.py
def create_cloned_volume(self, volume, src_vref):
src_name = utils.convert_str(src_vref.name)
dest_name = utils.convert_str(volume.name)
clone_snap = "%s.clone_snap" % dest_name
# Do full copy if requested
# rbd_max_clone_depth配置表示克隆卷的最大层数,如果为0表示不允许克隆,也就是完全拷贝
if self.configuration.rbd_max_clone_depth <= 0:
with RBDVolumeProxy(self, src_name, read_only=True) as vol:
# 卷拷贝
vol.copy(vol.ioctx, dest_name)
# resize(基于卷创建卷一样可以重新指定大小)
self._extend_if_required(volume, src_vref)
return
# Otherwise do COW clone.
# 基于克隆实现
with RADOSClient(self) as client:
src_volume = self.rbd.Image(client.ioctx, src_name)
LOG.debug("creating snapshot='%s'", clone_snap)
try:
# Create new snapshot of source volume
# 创建快照
src_volume.create_snap(clone_snap)
src_volume.protect_snap(clone_snap)
# Now clone source volume snapshot
LOG.debug("cloning '%(src_vol)s@%(src_snap)s' to "
"'%(dest)s'",
{'src_vol': src_name, 'src_snap': clone_snap,
'dest': dest_name})
# 基于快照进行克隆
self.RBDProxy().clone(client.ioctx, src_name, clone_snap,
client.ioctx, dest_name,
features=client.features)
except Exception as e:
src_volume.unprotect_snap(clone_snap)
src_volume.remove_snap(clone_snap)
src_volume.close()
msg = (_("Failed to clone '%(src_vol)s@%(src_snap)s' to "
"'%(dest)s', error: %(error)s") %
{'src_vol': src_name,
'src_snap': clone_snap,
'dest': dest_name,
'error': e})
LOG.exception(msg)
raise exception.VolumeBackendAPIException(data=msg)
# 获取克隆层数
depth = self._get_clone_depth(client, src_name)
# If dest volume is a clone and rbd_max_clone_depth reached,
# flatten the dest after cloning. Zero rbd_max_clone_depth means
# volumes are always flattened.
# 如果当前克隆卷层数大于配置的最大层数,则表示不允许再做克隆,需要拍平
if depth >= self.configuration.rbd_max_clone_depth:
LOG.info("maximum clone depth (%d) has been reached - "
"flattening dest volume",
self.configuration.rbd_max_clone_depth)
# Flatten destination volume
try:
with RBDVolumeProxy(self, dest_name, client=client,
ioctx=client.ioctx) as dest_volume:
LOG.debug("flattening dest volume %s", dest_name)
# 拍平
dest_volume.flatten()
except Exception as e:
msg = (_("Failed to flatten volume %(volume)s with "
"error: %(error)s.") %
{'volume': dest_name,
'error': e})
LOG.exception(msg)
src_volume.close()
raise exception.VolumeBackendAPIException(data=msg)
try:
# remove temporary snap
LOG.debug("remove temporary snap %s", clone_snap)
# 删除临时快照
src_volume.unprotect_snap(clone_snap)
src_volume.remove_snap(clone_snap)
except Exception as e:
msg = (_("Failed to remove temporary snap "
"%(snap_name)s, error: %(error)s") %
{'snap_name': clone_snap,
'error': e})
LOG.exception(msg)
src_volume.close()
raise exception.VolumeBackendAPIException(data=msg)
try:
volume_update = self._setup_volume(volume)
except Exception:
self.RBDProxy().remove(client.ioctx, dest_name)
src_volume.unprotect_snap(clone_snap)
src_volume.remove_snap(clone_snap)
err_msg = (_('Failed to enable image replication'))
raise exception.ReplicationError(reason=err_msg,
volume_id=volume.id)
finally:
src_volume.close()
self._extend_if_required(volume, src_vref)
LOG.debug("clone created successfully")
_create_from_image(从镜像创建卷)
待补充

本文详细解析了OpenStack Cinder组件中卷的调度与创建流程,从请求到达Cinder-scheduler开始,经过一系列的任务流处理,最终由Cinder-volume完成卷的创建。涉及关键步骤包括参数封装、调度决策、过滤器应用、权重计算及驱动调用。
1243

被折叠的 条评论
为什么被折叠?



