python 64式: 第29式、分布式锁与群组管理__4、tooz之一致性哈希,群组管理源码分析

本文通过调试分析tooz 1.57.4版本,探讨了如何使用Redis作为后端实现一致性哈希和群组管理。详细分析了`coordination.get_coordinator`方法和`join_partitioned_group`的实现,展示了Redis中组和成员的存储结构。此外,还介绍了定时任务`checkGroupStatus`如何监控群组状态变化。tooz在大规模部署环境中,结合其负载均衡和分布式锁功能,能有效提升服务的水平扩展能力。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

目标:
通过调试分析出现有的tooz是如何实现一致性哈希和群组管理,具体到redis做为后端实现的具体细节。

0 调试环境
本地环境调试版本:
pip list|grep tooz
tooz (1.57.4)

社区tooz代码在:
https://github.com/openstack/tooz
git clone https://github.com/openstack/tooz.git
git checkout -b tooz_comment_v1_1_57 1.57.4
切取指定tag的代码参考:
git checkout -b branch_name tag_name

结论:
发现tooz 1.43.2版本中没有hashring的代码,但是1.57.4是包含hashring的代码。


1 调试创建群组/加入群组部分代码
class PartionCoordinator(object):
    PARTION_NAMESPACE = "sky"

    def __init__(self, memberId=None, backendUrl=None):
        self._coordinator = None
        self._groups = set()
        self._memberId = memberId or str(uuid.uuid4().encode('ascii'))
        self._backendUrl = backendUrl
        self._group = None
        self.coordinator = coordination.get_coordinator(
            self._backendUrl, self._memberId
        )
        self.partitionSet = list(range(12))
        self.groupState = None

分析:
1.1 coordination.get_coordinator方法
调用
/usr/lib/python2.7/site-packages/tooz/coordination.py(768)get_coordinator()
具体代码如下:
768    def get_coordinator(backend_url, member_id,
769                          characteristics=frozenset(), **kwargs):
770          """Initialize and load the backend.
771      
772          :param backend_url: the backend URL to use
773          :type backend: str
774          :param member_id: the id of the member
775          :type member_id: ascii bytes
776          :param characteristics: set
777          :type characteristics: set of :py:class:`.Characteristics` that will
778                                 be matched to the requested driver (this **will**
779                                 become a **required** parameter in a future tooz
780                                 version)
781          :param kwargs: additional coordinator options (these take precedence over
782                         options of the **same** name found in the ``backend_url``
783                         arguments query string)
784          """
785          parsed_url = netutils.urlsplit(backend_url)
786          parsed_qs = six.moves.urllib.parse.parse_qs(parsed_url.query)
787          if kwargs:
788              options = {}
789              for (k, v) in six.iteritems(kwargs):
790                  options[k] = [v]
791              for (k, v) in six.iteritems(parsed_qs):
792                  if k not in options:
793                      options[k] = v
794          else:
795              options = parsed_qs
796          d = driver.DriverManager(
797              namespace=TOOZ_BACKENDS_NAMESPACE,
798              name=parsed_url.scheme,
799              invoke_on_load=True,
800              invoke_args=(member_id, parsed_url, options)).driver
801          characteristics = set(characteristics)
802          driver_characteristics = set(getattr(d, 'CHARACTERISTICS', set()))
803          missing_characteristics = characteristics - driver_characteristics
804          if missing_characteristics:
805              raise ToozDriverChosenPoorly("Desired characteristics %s"
806                                           " is not a strict subset of driver"
807                                           " characteristics %s, %s"
808                                           " characteristics were not found"
809                                           % (characteristics,
810                                              driver_characteristics,
811                                              missing_characteristics))
812          return d


分析:
1.1.1 入参分析
(Pdb) p backend_url
'redis://localhost:6379/'
(Pdb) p member_id
'node-3000'
(Pdb) p characteristics
frozenset([])
(Pdb) p kwargs
{}

1.1.2 代码分析
(Pdb) p parsed_url
SplitResult(scheme='redis', netloc='localhost:6379', path='/', query='', fragment='')
(Pdb) p type(parsed_url)
<class 'oslo_utils.netutils._ModifiedSplitResult'>
(Pdb) p parsed_url.query
''
(Pdb) p parsed_qs
{}
(Pdb) p options
{}
(Pdb) p TOOZ_BACKENDS_NAMESPACE
'tooz.backends'
(Pdb) p parsed_url.scheme
'redis'


1.1.3 
796          d = driver.DriverManager(
797            namespace=TOOZ_BACKENDS_NAMESPACE,
798              name=parsed_url.scheme,
799              invoke_on_load=True,
800              invoke_args=(member_id, parsed_url, options)).driver

分析:
1.1.3.1 调用结果
(Pdb) p d
<tooz.drivers.redis.RedisDriver object at 0x7fa6ccbbcb10>
(Pdb) p type(d)
<class 'tooz.drivers.redis.RedisDriver'>
(Pdb) p d.__dict__
{'heart': <tooz.coordination.Heart object at 0x7fa6ccbbcb90>, '_namespace': '_tooz', '_executor': <tooz.utils.ProxyExecutor object at 0x7fa6ccbbcb50>, '_encoding': 'utf8', '_acquired_locks': set([]), '_joined_groups': set([]), 'lock_timeout': 30, '_hooks_join_group': defaultdict(<class 'tooz.coordination.Hooks'>, {}), '_group_prefix': '_tooz_group', '_client': None, '_started': False, '_groups': '_tooz_groups', '_scripts': {}, '_hooks_elected_leader': defaultdict(<class 'tooz.coordination.Hooks'>, {}), '_member_id': 'node-3000', '_server_info': {}, '_beat_prefix': '_tooz_beats', '_options': {}, '_parsed_url': SplitResult(scheme='redis', netloc='localhost:6379', path='/', query='', fragment=''), 'requires_beating': True, '_group_members': defaultdict(<type 'set'>, {}), 'timeout': 30, '_hooks_leave_group': defaultdict(<class 'tooz.coordination.Hooks'>, {}), 'membership_timeout': 30.0}

1.2 回到代码继续分析
class PartionCoordinator(object):
    PARTION_NAMESPACE = "sky"

    def __init__(self, memberId=None, backendUrl=None):
        self._coordinator = None
        self._groups = set()
        self._memberId = memberId or str(uuid.uuid4().encode('ascii'))
        self._backendUrl = backendUrl
        self._group = None
        self.coordinator = coordination.get_coordinator(
            self._backendUrl, self._memberId
        )
        self.partitionSet = list(range(12))
        self.groupState = None

分析:
1.2.1 协调器结果分析
(Pdb) p self.coordinator
<tooz.drivers.redis.RedisDriver object at 0x7fa6ccbbcb10>
(Pdb) p type(self.coordinator)
<class 'tooz.drivers.redis.RedisDriver'>
(Pdb) p self.coordinator.__dict__
{'heart': <tooz.coordination.Heart object at 0x7fa6ccbbcb90>, '_namespace': '_tooz', '_executor': <tooz.utils.ProxyExecutor object at 0x7fa6ccbbcb50>, '_encoding': 'utf8', '_acquired_locks': set([]), '_joined_groups': set([]), 'lock_timeout': 30, '_hooks_join_group': defaultdict(<class 'tooz.coordination.Hooks'>, {}), '_group_prefix': '_tooz_group', '_client': None, '_started': False, '_groups': '_tooz_groups', '_scripts': {}, '_hooks_elected_leader': defaultdict(<class 'tooz.coordination.Hooks'>, {}), '_member_id': 'node-3000', '_server_info': {}, '_beat_prefix': '_tooz_beats', '_options': {}, '_parsed_url': SplitResult(scheme='redis', netloc='localhost:6379', path='/', query='', fragment=''), 'requires_beating': True, '_group_members': defaultdict(<type 'set'>, {}), 'timeout': 30, '_hooks_leave_group': defaultdict(<class 'tooz.coordination.Hooks'>, {}), 'membership_timeout': 30.0}

1.2.2 其他
(Pdb) p self.partitionSet
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]


2 分析协调器处理代码
    def process(self):
        self.coordinator.start(start_heart=True)
        self.hashring = self.coordinator.join_partitioned_group(
            self.PARTION_NAMESPACE
        )

        @periodics.periodic(spacing=300, run_immediately=True)
        def checkGroupStatus():
            self.coordinator.run_watchers()
            if self.groupState != self.hashring.ring.nodes:
                self.groupState = self.hashring.ring.nodes.copy()
                # TODO()
                self.refreshService()

        self.periodic = periodics.PeriodicWorker.create(
            [],
            executor_factory=lambda: futures.ThreadPoolExecutor(max_workers=10))

        self.periodic.add(checkGroupStatus)
        spawnThread(self.periodic.start)

分析:
2.1) 分析
self.coordinator.start(start_heart=True)
进入:
/usr/lib/python2.7/site-packages/tooz/coordination.py(685)start()
代码如下:
685        def start(self, start_heart=False):
686              self._executor.start()
687              super(CoordinationDriverWithExecutor, self).start(start_heart)

2.1.1 分析
(Pdb) p self._executor
<tooz.utils.ProxyExecutor object at 0x7fa6ccbbcb50>
(Pdb) p type(self._executor)
<class 'tooz.utils.ProxyExecutor'>
(Pdb) p self._executor.__dict__
{'started': False, 'internally_owned': True, 'default_executor_factory': <function <lambda> at 0x7fa6cd997410>, 'driver_name': 'RedisDriver', 'executor': None}

2.1.2 分析
self._executor.start()
进入:
/usr/lib/python2.7/site-packages/tooz/utils.py(92)start()
 92        def start(self):
 93              if self.started:
 94                  return
 95              self.executor = self.default_executor_factory()
 96              self.started = True

分析:
2.1.2.1
(Pdb) p self.started
False

2.1.2.2
self.executor = self.default_executor_factory()
进入:
/usr/lib/python2.7/site-packages/tooz/utils.py(58)<lambda>()
对应代码:
 56      class ProxyExecutor(object):
 57          KIND_TO_FACTORY = {
 58  ->            'threaded': (lambda:
 59                           futurist.ThreadPoolExecutor(max_workers=1)),
 60              'synchronous': lambda: futurist.SynchronousExecutor(),
 61          }
 62      
 63          # Provide a few common aliases...
 64          KIND_TO_FACTORY['thread'] = KIND_TO_FACTORY['threaded']
 65          KIND_TO_FACTORY['threading'] = KIND_TO_FACTORY['threaded']
 66          KIND_TO_FACTORY['sync'] = KIND_TO_FACTORY['synchronous']
 67      
 68          DEFAULT_KIND = 'threaded'
 69      
 70          def __init__(self, driver_name, default_executor_factory):
 71              self.default_executor_factory = default_executor_factory
 72              self.driver_name = driver_name
 73              self.started = False
 74              self.executor = None
 75              self.internally_owned = True


解释:
 57          KIND_TO_FACTORY = {
 58  ->            'threaded': (lambda:
 59                           futurist.ThreadPoolExecutor(max_workers=1)),
 60              'synchronous': lambda: futurist.SynchronousExecutor(),
 61          }
这里主要设置了几种工厂类型,主要就是线程池执行器和异步执行器


2.1.2.3 回到
 92        def start(self):
 93              if self.started:
 94                  return
 95              self.executor = self.default_executor_factory()
 96              self.started = True

分析:
(Pdb) p self.executor
<futurist._futures.ThreadPoolExecutor object at 0x7fa6ccbbced0>
(Pdb) p type(self.executor)
<class 'futurist._futures.ThreadPoolExecutor'>
(Pdb) p self.executor.__dict__
{'_shutdown_lock': <_RLock owner=None count=0>, '_shutdown': False, '_max_workers': 1, '_gatherer': <futurist._futures._Gatherer object at 0x7fa6ccbca050>, '_check_and_reject': <function <lambda> at 0x7fa6ccbc2b90>, '_work_queue': <Queue.Queue instance at 0x7fa6ccbc66c8>, '_workers': []}


2.2 回到代码继续分析
/usr/lib/python2.7/site-packages/tooz/coordination.py(685)start()
代码如下:
685        def start(self, start_heart=False):
686              self._executor.start()
687              super(CoordinationDriverWithExecutor, self).start(start_heart)

分析:
2.2.1 之前调用分析
self._executor.start()主要就是设置executor为
futurist._futures.ThreadPoolExecutor
并设置了一个启动标记started为True

2.2.2 分析
super(CoordinationDriverWithExecutor, self).start(start_heart)
进入:
/usr/lib/python2.7/site-packages/tooz/coordination.py(414)start()
代码如下:
414        def start(self, start_heart=False):
415              """Start the service engine.
416      
417              If needed, the establishment of a connection to the servers
418              is initiated.
419              """
420              if self._started:
421                  raise tooz.ToozError(
422                      "Can not start a driver which has not been stopped")
423              self._start()
424              if self.requires_beating and start_heart:
425                  self.heart.start()
426              self._started = True
427              # Tracks which group are joined
428              self._joined_groups = set()


分析:
2.2.2.1 
(Pdb) p self._started
False
调用这个方法
self._start()
进入:
/usr/lib/python2.7/site-packages/tooz/drivers/redis.py(433)_start()
代码如下:
433        def _start(self):
434              super(RedisDriver, self)._start()
435              try:
436                  self._client = self._make_client(self._parsed_url, self._options,
437                                                   self.timeout)
438              except exceptions.RedisError as e:
439                  utils.raise_with_cause(coordination.ToozConnectionError,
440                                         encodeutils.exception_to_unicode(e),
441                                         cause=e)
442              else:
443                  # Ensure that the server is alive and not dead, this does not
444                  # ensure the server will always be alive, but does insure that it
445                  # at least is alive once...
446                  with _translate_failures():
447                      self._server_info = self._client.info()
448                  # Validate we have a good enough redis version we are connected
449                  # to so that the basic set of features we support will actually
450                  # work (instead of blowing up).
451                  new_enough, redis_version = self._check_fetch_redis_version(
452                      self.MIN_VERSION)
453                  if not new_enough:
454                      raise tooz.NotImplemented("Redis version greater than or"
455                                                " equal to '%s' is required"
456                                                " to use this driver; '%s' is"
457                                                " being used which is not new"
458                                                " enough" % (self.MIN_VERSION,
459                                                             redis_version))
460                  tpl_params = {
461                      'group_existence_value': self.GROUP_EXISTS_VALUE,
462                      'group_existence_key': self.GROUP_EXISTS,
463                  }
464                  # For py3.x ensure these are unicode since the string template
465                  # replacement will expect unicode (and we don't want b'' as a
466                  # prefix which will happen in py3.x if this is not done).
467                  for (k, v) in six.iteritems(tpl_params.copy()):
468                      if isinstance(v, six.binary_type):
469                          v = v.decode('ascii')
470                      tpl_params[k] = v
471                  prepared_scripts = {}
472                  for name, raw_script_tpl in six.iteritems(self.SCRIPTS):
473                      script_tpl = string.Template(raw_script_tpl)
474                      script = script_tpl.substitute(**tpl_params)
475                      prepared_scripts[name] = self._client.register_script(script)
476                  self._scripts = prepared_scripts
477                  self.heartbeat()
478                  self._started = True

分析:
1) 调用了
super(RedisDriver, self)._start()
进入:
/usr/lib/python2.7/site-packages/tooz/coordination.py(430)_start()
代码如下:
430        def _start(self):
431              pass

也就是说RedisDriver的父类的_start方法实际没有执行任何东西

(Pdb) p self._parsed_url
SplitResult(scheme='redis', netloc='localhost:6379', path='/', query='', fragment='')

(Pdb) p self._options
{}

(Pdb) p self.timeout
30

调用了_make_client方法

2) 分析_make_client方法
进入:
/usr/lib/python2.7/site-packages/tooz/drivers/redis.py(384)_make_client()


384        @classmethod
385          def _make_client(cls, parsed_url, options, default_socket_timeout):
386              kwargs = {}
387              if parsed_url.hostname:
388                  kwargs['host'] = parsed_url.hostname
389                  if parsed_url.port:
390                      kwargs['port'] = parsed_url.port
391              else:
392                  if not parsed_url.path:
393                      raise ValueError("Expected socket path in parsed urls path")
394                  kwargs['unix_socket_path'] = parsed_url.path
395              if parsed_url.password:
396                  kwargs['password'] = parsed_url.password
397              for a in cls.CLIENT_ARGS:
398                  if a not in options:
399                      continue
400                  if a in cls.CLIENT_BOOL_ARGS:
401                      v = strutils.bool_from_string(options[a])
402                  elif a in cls.CLIENT_LIST_ARGS:
403                      v = options[a]
404                  elif a in cls.CLIENT_INT_ARGS:
405                      v = int(options[a])
406                  else:
407                      v = options[a]
408                  kwargs[a] = v
409              if 'socket_timeout' not in kwargs:
410                  kwargs['socket_timeout'] = default_socket_timeout
411 
412              # Ask the sentinel for the current master if there is a
413              # sentinel arg.
414              if 'sentinel' in kwargs:
415                  sentinel_hosts = [
416                      tuple(fallback.split(':'))
417                      for fallback in kwargs.get('sentinel_fallback', [])
418                  ]
419                  sentinel_hosts.insert(0, (kwargs['host'], kwargs['port']))
420                  sentinel_server = sentinel.Sentinel(
421                      sentinel_hosts,
422                      socket_timeout=kwargs['socket_timeout'])
423                  sentinel_name = kwargs['sentinel']
424                  del kwargs['sentinel']
425                  if 'sentinel_fallback' in kwargs:
426                      del kwargs['sentinel_fallback']
427                  master_client = sentinel_server.master_for(sentinel_name, **kwargs)
428                  # The master_client is a redis.StrictRedis using a
429                  # Sentinel managed connection pool.
430                  return master_client
431              return redis.StrictRedis(**kwargs)

分析:
入参分析
(Pdb) p cls
<class 'tooz.drivers.redis.RedisDriver'>
(Pdb) p parsed_url
SplitResult(scheme='redis', netloc='localhost:6379', path='/', query='', fragment='')
(Pdb) p type(cls)
<type 'type'>
(Pdb) p type(parsed_url)
<class 'oslo_utils.netutils._ModifiedSplitResult'>
(Pdb) p options
{}
(Pdb) p default_socket_timeout
30
(Pdb) p kwargs
{'socket_timeout': 30, 'host': 'localhost', 'port': 6379}

最后返回了:
redis.StrictRedis对象,即一个Redis客户端

总结:
这里实际上就是当tooz的driver使用redis,就初始化了redis的客户端,用于后续的操作

2.2.2.2)回到代码继续分析
/usr/lib/python2.7/site-packages/tooz/drivers/redis.py(433)_start()
代码如下:
433        def _start(self):
434              super(RedisDriver, self)._start()
435              try:
436                  self._client = self._make_client(self._parsed_url, self._options,
437                                                   self.timeout)
438              except exceptions.RedisError as e:
439                  utils.raise_with_cause(coordination.ToozConnectionError,
440                                         encodeutils.exception_to_unicode(e),
441                                         cause=e)
442              else:
443                  # Ensure that the server is alive and not dead, this does not
444                  # ensure the server will always be alive, but does insure that it
445                  # at least is alive once...
446                  with _translate_failures():
447                      self._server_info = self._client.info()
448                  # Validate we have a good enough redis version we are connected
449                  # to so that the basic set of features we support will actually
450                  # work (instead of blowing up).
451                  new_enough, redis_version = self._check_fetch_redis_version(
452                      self.MIN_VERSION)
453                  if not new_enough:
454                      raise tooz.NotImplemented("Redis version greater than or"
455                                                " equal to '%s' is required"
456                                                " to use this driver; '%s' is"
457                                                " being used which is not new"
458                                                " enough" % (self.MIN_VERSION,
459                                                             redis_version))
460                  tpl_params = {
461                      'group_existence_value': self.GROUP_EXISTS_VALUE,
462                      'group_existence_key': self.GROUP_EXISTS,
463                  }
464                  # For py3.x ensure these are unicode since the string template
465                  # replacement will expect unicode (and we don't want b'' as a
466                  # prefix which will happen in py3.x if this is not done).
467                  for (k, v) in six.iteritems(tpl_params.copy()):
468                      if isinstance(v, six.binary_type):
469                          v = v.decode('ascii')
470                      tpl_params[k] = v
471                  prepared_scripts = {}
472                  for name, raw_script_tpl in six.iteritems(self.SCRIPTS):
473                      script_tpl = string.Template(raw_script_tpl)
474                      script = script_tpl.substitute(**tpl_params)
475                      prepared_scripts[name] = self._client.register_script(script)
476                  self._scripts = prepared_scripts
477                  self.heartbeat()
478                  self._started = True

分析:
A) 
self._make_client(self._parsed_url, self._options, self.timeout)
实际返回了StrictRedis对象即Redis客户端,这里是因为配置了driver为Redis

(Pdb) p self._client
StrictRedis<ConnectionPool<Connection<host=localhost,port=6379,db=0>>>
(Pdb) p type(self._client)
<class 'redis.client.StrictRedis'>

(Pdb) p tpl_params
{'group_existence_key': u'__created__', 'group_existence_value': u'1'}

B) 分析
467                  for (k, v) in six.iteritems(tpl_params.copy()):
468                      if isinstance(v, six.binary_type):
469                          v = v.decode('ascii')
470                      tpl_params[k] = v
471                  prepared_scripts = {}
472                  for name, raw_script_tpl in six.iteritems(self.SCRIPTS):
473                      script_tpl = string.Template(raw_script_tpl)
474                      script = script_tpl.substitute(**tpl_params)
475                      prepared_scripts[name] = self._client.register_script(script)

参数分析:
(Pdb) p self.SCRIPTS
{'delete_group': '\n-- Extract *all* the variables (so we can easily know what they are)...\nlocal namespaced_group_key = KEYS[1]\nlocal all_groups_key = KEYS[2]\nlocal no_namespaced_group_key = ARGV[1]\nif redis.call("exists", namespaced_group_key) == 0 then\n    return -1\nend\nif redis.call("sismember", all_groups_key, no_namespaced_group_key) == 0 then\n    return -2\nend\nif redis.call("hlen", namespaced_group_key) > 1 then\n    return -3\nend\n-- First remove from the set (then delete the group); if the set removal\n-- fails, at least the group will still exist (and can be fixed manually)...\nif redis.call("srem", all_groups_key, no_namespaced_group_key) == 0 then\n    return -4\nend\nredis.call("del", namespaced_group_key)\nreturn 1\n', 'create_group': '\n-- Extract *all* the variables (so we can easily know what they are)...\nlocal namespaced_group_key = KEYS[1]\nlocal all_groups_key = KEYS[2]\nlocal no_namespaced_group_key = ARGV[1]\nif redis.call("exists", namespaced_group_key) == 1 then\n    return 0\nend\nredis.call("sadd", all_groups_key, no_namespaced_group_key)\nredis.call("hset", namespaced_group_key,\n           "${group_existence_key}", "${group_existence_value}")\nreturn 1\n', 'update_capabilities': '\n-- Extract *all* the variables (so we can easily know what they are)...\nlocal group_key = KEYS[1]\nlocal member_id = ARGV[1]\nlocal caps = ARGV[2]\nif redis.call("exists", group_key) == 0 then\n    return -1\nend\nif redis.call("hexists", group_key, member_id) == 0 then\n    return -2\nend\nredis.call("hset", group_key, member_id, caps)\nreturn 1\n'}

最终:
(Pdb) p prepared_scripts
{'delete_group': <redis.client.Script object at 0x7ff8c5dbde50>, 'create_group': <redis.client.Script object at 0x7ff8c5df7690>, 'update_capabilities': <redis.client.Script object at 0x7ff8c5dbded0>}

p prepared_scripts['delete_group']
(Pdb) p prepared_scripts['delete_group']
<redis.client.Script object at 0x7ff8c5dbde50>
(Pdb) p type(prepared_scripts['delete_group'])
<class 'redis.client.Script'>
(Pdb) p prepared_scripts['delete_group'].__dict__
{'sha': 'e47a96214548e256ec64eb168ba17283d16099f1', 'registered_client': StrictRedis<ConnectionPool<Connection<host=localhost,port=6379,db=0>>>, 'script': '\n-- Extract *all* the variables (so we can easily know what they are)...\nlocal namespaced_group_key = KEYS[1]\nlocal all_groups_key = KEYS[2]\nlocal no_namespaced_group_key = ARGV[1]\nif redis.call("exists", namespaced_group_key) == 0 then\n    return -1\nend\nif redis.call("sismember", all_groups_key, no_namespaced_group_key) == 0 then\n    return -2\nend\nif redis.call("hlen", namespaced_group_key) > 1 then\n    return -3\nend\n-- First remove from the set (then delete the group); if the set removal\n-- fails, at least the group will still exist (and can be fixed manually)...\nif redis.call("srem", all_groups_key, no_namespaced_group_key) == 0 then\n    return -4\nend\nredis.call("del", namespaced_group_key)\nreturn 1\n'}

p prepared_scripts['create_group']
(Pdb) p prepared_scripts['create_group']
<redis.client.Script object at 0x7ff8c5df7690>
(Pdb) p prepared_scripts['create_group'].__dict__
{'sha': 'c6ec40a30128b8d86488e3850e926d1c60118037', 'registered_client': StrictRedis<ConnectionPool<Connection<host=localhost,port=6379,db=0>>>, 'script': u'\n-- Extract *all* the variables (so we can easily know what they are)...\nlocal namespaced_group_key = KEYS[1]\nlocal all_groups_key = KEYS[2]\nlocal no_namespaced_group_key = ARGV[1]\nif redis.call("exists", namespaced_group_key) == 1 then\n    return 0\nend\nredis.call("sadd", all_groups_key, no_namespaced_group_key)\nredis.call("hset", namespaced_group_key,\n           "__created__", "1")\nreturn 1\n'}


p prepared_scripts['update_capabilities']
(Pdb) p prepared_scripts['update_capabilities']
<redis.client.Script object at 0x7ff8c5dbded0>
(Pdb) p prepared_scripts['update_capabilities'].__dict__
{'sha': 'cccb120b934124a8e613a354221a1636a0ced4bd', 'registered_client': StrictRedis<ConnectionPool<Connection<host=localhost,port=6379,db=0>>>, 'script': '\n-- Extract *all* the variables (so we can easily know what they are)...\nlocal group_key = KEYS[1]\nlocal member_id = ARGV[1]\nlocal caps = ARGV[2]\nif redis.call("exists", group_key) == 0 then\n    return -1\nend\nif redis.call("hexists", group_key, member_id) == 0 then\n    return -2\nend\nredis.call("hset", group_key, member_id, caps)\nreturn 1\n'}

       
import string
string模块是对字符串操作的模块
Template类可以格式化字符串
1 其中变量命名是类似: $var,这种
2 可以通过继承string.Template, 重写变量delimiter(定界符)和idpattern(替换格式), 定制不同形式的模板
关于string的用法参考:
https://blog.youkuaiyun.com/github_36601823/article/details/77815013
https://www.jb51.net/article/108179.htm

c) 分析
prepared_scripts[name] = self._client.register_script(script)
作用:注册一个Lua脚本,返回一个可执行的脚本对象

D) 分析
self.heartbeat() 方法
会进入:
/usr/lib/python2.7/site-packages/tooz/drivers/redis.py(507)heartbeat()
代码如下:
    def heartbeat(self):
        with _translate_failures():
            beat_id = self._encode_beat_id(self._member_id)
            expiry_ms = max(0, int(self.membership_timeout * 1000.0))
            self._client.psetex(beat_id, time_ms=expiry_ms,
                                value=self.STILL_ALIVE)
        for lock in self._acquired_locks.copy():
            try:
                lock.heartbeat()
            except tooz.ToozError:
                LOG.warning("Unable to heartbeat lock '%s'", lock,
                            exc_info=True)
        return min(self.lock_timeout, self.membership_timeout)

分析:
heartbeat(self): 
设置某个组成员的心跳id,默认30秒,会向redis调用PSETEX name ttl_ms value
的形式给该组成员设置一个键为: <beat_prefix>:<member_id>的字符串,
具体键的样例如下: '_tooz_beats:node-3000'
返回超时时间,默认30秒


总结:
RedisDriver的_start方法
作用: 构造redis客户端,然后生成redis的lua模板,包含create_group,delete_group,update_capabilities
  为每个脚本调用register_script生成可执行的脚本对象,最后记录到脚本字典中;
  为当前组成员调用redis的PSETEX name ttl_ms value创建一个默认30秒的过期的键值对。
  
具体过程: 
步骤1: 
调用_make_client(cls, parsed_url, options, default_socket_timeout):
根据解析后的url,选项列表和超时时间初始化redis.StrictRedis客户端
传入的参数实际是:
{'socket_timeout': 30, 'host': 'localhost', 'port': 6379}
步骤2:
用参数字典: {'group_existence_key': u'__created__', 'group_existence_value': u'1'}
去填充redis的lua模板,然后对每个填充后的lua脚本,执行:
StrictRedis.register_script方法注册得到一个可执行的脚本对象
3个脚本内容分别如下:
delete_group脚本对应内容如下:
-- Extract *all* the variables (so we can easily know what they are)...
local namespaced_group_key = KEYS[1]
local all_groups_key = KEYS[2]
local no_namespaced_group_key = ARGV[1]
if redis.call("exists", namespaced_group_key) == 0 then
return -1
end
if redis.call("sismember", all_groups_key, no_namespaced_group_key) == 0 then
return -2
end
if redis.call("hlen", namespaced_group_key) > 1 then
return -3
end
-- First remove from the set (then delete the group); if the set removal
-- fails, at least the group will still exist (and can be fixed manually)...
if redis.call("srem", all_groups_key, no_namespaced_group_key) == 0 then
return -4
end
redis.call("del", namespaced_group_key)
return 1

create_group脚本如下:
-- Extract *all* the variables (so we can easily know what they are)...
local namespaced_group_key = KEYS[1]
local all_groups_key = KEYS[2]
local no_namespaced_group_key = ARGV[1]
if redis.call("exists", namespaced_group_key) == 1 then
return 0
end
redis.call("sadd", all_groups_key, no_namespaced_group_key)
redis.call("hset", namespaced_group_key,
       "__created__", "1")
return 1

update_capabilities脚本如下:
-- Extract *all* the variables (so we can easily know what they are)...
local group_key = KEYS[1]
local member_id = ARGV[1]
local caps = ARGV[2]
if redis.call("exists", group_key) == 0 then
return -1
end
if redis.call("hexists", group_key, member_id) == 0 then
return -2
end
redis.call("hset", group_key, member_id, caps)
return 1


步骤3: 最后将脚本名称和脚本对象加入到脚本字典中,最终得到类似如下字典:
{'delete_group': <redis.client.Script object at 0x7ff8c5dbde50>, 'create_group': <redis.client.Script object at 0x7ff8c5df7690>, 'update_capabilities': <redis.client.Script object at 0x7ff8c5dbded0>}

步骤4:
调用heartbeat(self): 
设置某个组成员的心跳id,默认30秒,会向redis调用PSETEX name ttl_ms value
的形式给该组成员设置一个键为: <beat_prefix>:<member_id>的字符串,
具体键的样例如下: '_tooz_beats:node-3000'
返回超时时间,默认30秒

步骤5: 设置self._started组成员启动标记为True


2.3 继续回到客户端的调用方法
class PartionCoordinator(object):
    PARTION_NAMESPACE = "sky"

    def __init__(self, memberId=None, backendUrl=None):
        self._coordinator = None
        self._groups = set()
        self._memberId = memberId or str(uuid.uuid4().encode('ascii'))
        self._backendUrl = backendUrl
        self._group = None

        self.coordinator = coordination.get_coordinator(
            self._backendUrl, self._memberId
        )
        self.partitionSet = list(range(12))
        self.groupState = None

    def process(self):
        self.coordinator.start(start_heart=True)
        self.hashring = self.coordinator.join_partitioned_group(
            self.PARTION_NAMESPACE
        )

        @periodics.periodic(spacing=3000, run_immediately=True)
        def checkGroupStatus():
            # import pdb;pdb.set_trace()
            self.coordinator.run_watchers()
            if self.groupState != self.hashring.ring.nodes:
                self.groupState = self.hashring.ring.nodes.copy()
                # TODO()
                self.refreshService()

        self.periodic = periodics.PeriodicWorker.create(
            [],
            executor_factory=lambda: futures.ThreadPoolExecutor(max_workers=10))

        self.periodic.add(checkGroupStatus)
        spawnThread(self.periodic.start)

分析:
2.3.1) 
self.coordinator.start(start_heart=True)
该方法已经分析完成,具体就是针对RedisDriver,就构造redis客户端,然后生成redis的lua模板,
包含create_group,delete_group,update_capabilities
为每个脚本调用register_script生成可执行的脚本对象,最后记录到脚本字典中;
为当前组成员调用redis的PSETEX name ttl_ms value创建一个默认30秒的过期的键值对。

2.3.2)
分析
self.hashring = self.coordinator.join_partitioned_group(
    self.PARTION_NAMESPACE
)
具体参见3的分析

3 分析
self.hashring = self.coordinator.join_partitioned_group(self.PARTION_NAMESPACE)
进入文件: tooz/tooz/coordination.py
代码如下:
    def join_partitioned_group(
            self, group_id,
            weight=1,
            partitions=partitioner.Partitioner.DEFAULT_PARTITION_NUMBER):
        """Join a group and get a partitioner.

        A partitioner allows to distribute a bunch of objects across several
        members using a consistent hash ring. Each object gets assigned (at
        least) one member responsible for it. It's then possible to check which
        object is owned by any member of the group.

        This method also creates if necessary, and joins the group with the
        selected weight.

        :param group_id: The group to create a partitioner for.
        :param weight: The weight to use in the hashring for this node.
        :param partitions: The number of partitions to create.
        :return: A :py:class:`~tooz.partitioner.Partitioner` object.

        """
        self.join_group_create(group_id, capabilities={'weight': weight})
        return partitioner.Partitioner(self, group_id, partitions=partitions)

分析:
3.1) 分析
self.join_group_create(group_id, capabilities={'weight': weight})
进入代码:
    @_retry.retry()
    def join_group_create(self, group_id, capabilities=b""):
        """Join a group and create it if necessary.

        If the group cannot be joined because it does not exist, it is created
        before being joined.

        This function will keep retrying until it can create the group and join
        it. Since nothing is transactional, it may have to retry several times
        if another member is creating/deleting the group at the same time.

        :param group_id: Identifier of the group to join and create
        :param capabilities: the capabilities of the joined member
        """
        req = self.join_group(group_id, capabilities)
        try:
            req.get()
        except GroupNotCreated:
            req = self.create_group(group_id)
            try:
                req.get()
            except GroupAlreadyExist:
                # The group might have been created in the meantime, ignore
                pass
            # Now retry to join the group
            raise _retry.TryAgain

先调用
req = self.join_group(group_id, capabilities)
具体参见3.1.1的分析
发生异常即组存在,则调用:
req = self.create_group(group_id)
具体参见3.1.2的分析

3.1.1) 分析
req = self.join_group(group_id, capabilities)
进入:
tooz/drivers/redis.py的

    def join_group(self, group_id, capabilities=b""):
        encoded_group = self._encode_group_id(group_id)
        encoded_member_id = self._encode_member_id(self._member_id)

        def _join_group(p):
            if not p.exists(encoded_group):
                raise coordination.GroupNotCreated(group_id)
            p.multi()
            p.hset(encoded_group, encoded_member_id,
                   self._dumps(capabilities))
            c = p.execute()[0]
            if c == 0:
                # Field already exists...
                raise coordination.MemberAlreadyExist(group_id,
                                                      self._member_id)
            else:
                self._joined_groups.add(group_id)

        return RedisFutureResult(self._submit(self._client.transaction,
                                              _join_group,
                                              encoded_group,
                                              value_from_callable=True))
分析:
def join_group(self, group_id, capabilities=b""):
通过StrictRedis.transaction方法调用pipeline去执行_join_group(p)方法,
而_join_group(p)方法具体处理逻辑如下:
如果组存在,则抛出异常;否则,调用redis的multi方法开启一个事务,
然后调用redis的haset方法,具体如下:
hset(encoded_group, encoded_member_id, self._dumps(capabilities))
最后调用redis的execute方法执行事务对应的队列中的所有命令,如果haset设置
hash表的表名是组名,键是成员id,值是成员的权重,形如: '{'weight': 1}'
如果执行成功,则将当前创建的组名加入到组集合中。
总结:
这里创建的组实际上redis中的hash,所有的组是redis中的set,组成员是string的键值对,样例类似如下:
127.0.0.1:6379> HKEYS _tooz_group:sky
1) "__created__"
2) "node-3000"

127.0.0.1:6379> HGET _tooz_group:sky node-3000
"\x81\xc4\x06weight\x01"
127.0.0.1:6379> HGET _tooz_group:sky __created__
"1"

附上其他redis信息:
127.0.0.1:6379> KEYS *
1) "_tooz_groups"
2) "_tooz_beats:node-3000"
3) "_tooz_group:sky"

127.0.0.1:6379> SMEMBERS _tooz_groups
1) "sky"
127.0.0.1:6379> GET _tooz_beats:node-3000
"Not dead!"


3.1.2)
分析组存在时则调用
req = self.create_group(group_id)
进入:
tooz/drivers/redis.py中的如下代码
def create_group(self, group_id):
script = self._get_script('create_group')

def _create_group(script):
    encoded_group = self._encode_group_id(group_id)
    keys = [
        encoded_group,
        self._groups,
    ]
    args = [
        self._encode_group_id(group_id, apply_namespace=False),
    ]
    result = script(keys=keys, args=args)
    result = strutils.bool_from_string(result)
    if not result:
        raise coordination.GroupAlreadyExist(group_id)

return RedisFutureResult(self._submit(_create_group, script))

分析:
_create_group(script):
tooz中创建group实际是调用lua脚本去做的,具体是:
如果待创建group名称已经在redis中存在,则直接返回;
否则则调用redis的sadd方法将当前待创建group名称加入到group集合中,
然后调用hset方法将键值对<"__created__", "1">写入到group名称的hash表中。
其中lua脚本对象是redis.client.Script对象
脚本如下:
    create_group对应脚本如下:
    
    """
    -- Extract *all* the variables (so we can easily know what they are)...
    local namespaced_group_key = KEYS[1]
    local all_groups_key = KEYS[2]
    local no_namespaced_group_key = ARGV[1]
    if redis.call("exists", namespaced_group_key) == 1 then
        return 0
    end
    redis.call("sadd", all_groups_key, no_namespaced_group_key)
    redis.call("hset", namespaced_group_key,
               "${group_existence_key}", "${group_existence_value}")
    return 1
    """

总结:
join_group_create(self, group_id, capabilities=b""):
作用: 加入到组中,如果组不存在就创建这个组,然后加入到组中
具体过程如下:
步骤1: 首次尝试加入到组中,如果加入组失败,则抛出异常,进入到创建组的代码
步骤2: 创建组时,调用create_group方法,该方法具体如下:
    create_group(self, group_id):
    获取'create_group'对应的lua脚本,然后使用线程池执行器futurist._futures.ThreadPoolExecutor去执行_creage_group方法
    _create_group方法中则创建group实际是调用lua脚本去做的,具体是:
    如果待创建group名称已经在redis中存在,则直接返回;
    否则则调用redis的sadd方法将当前待创建group名称加入到group集合中,
    然后调用hset方法将键值对<"__created__", "1">写入到group名称的hash表中。

    其中lua脚本对象是redis.client.Script对象, 脚本内容如下:
    create_group对应脚本如下:

    """
    -- Extract *all* the variables (so we can easily know what they are)...
    local namespaced_group_key = KEYS[1]
    local all_groups_key = KEYS[2]
    local no_namespaced_group_key = ARGV[1]
    if redis.call("exists", namespaced_group_key) == 1 then
        return 0
    end
    redis.call("sadd", all_groups_key, no_namespaced_group_key)
    redis.call("hset", namespaced_group_key,
               "${group_existence_key}", "${group_existence_value}")
    return 1
    """
步骤3: 继续调用join_group(group_id, capabilities)方法将成员加入到组中,具体
    方法如下:
            def join_group(self, group_id, capabilities=b""):
通过StrictRedis.transaction方法调用pipelien去执行_join_group(p)方法,
而_join_group(p)方法具体处理逻辑如下:
如果组不存在,则抛出异常;否则,调用redis的multi方法开启一个事务,
然后调用redis的haset方法,具体如下:
hset(encoded_group, encoded_member_id, self._dumps(capabilities))
最后调用redis的execute方法执行事务对应的队列中的所有命令,如果haset设置
hash表的表名是组名,键是成员id,值是成员的权重,形如: '{'weight': 1}'
如果执行成功,则将当前创建的组名加入到组集合中。
总结:
这里创建的组实际上redis中的hash,所有的组是redis中的set,组成员是string的键值对,样例类似如下:
127.0.0.1:6379> HKEYS _tooz_group:sky
1) "__created__"
2) "node-3000"

127.0.0.1:6379> HGET _tooz_group:sky node-3000
"\x81\xc4\x06weight\x01"
127.0.0.1:6379> HGET _tooz_group:sky __created__
"1"

附上其他redis信息:
127.0.0.1:6379> KEYS *
1) "_tooz_groups"
2) "_tooz_beats:node-3000"
3) "_tooz_group:sky"

127.0.0.1:6379> SMEMBERS _tooz_groups
1) "sky"
127.0.0.1:6379> GET _tooz_beaCoordinationDriverCachedRunWatchers(CoordinationDriver)ts:node-3000
"Not dead!"
总结: 
join_group_create(self, group_id, capabilities=b"")
方法会先尝试将当前组成员加入到组中,如果失败,则调用
redis的lua脚本的sadd将当前组加入到组集合中并创建组,
然后调用redis的hset方法将键值对<"__created__", "1">写入到group名称的hash表中。
最后在进行组成员加入组的操作中通过redis的hset方法将<组成员, 组成员的权重信息>加入到组所在的hash表中,


3.2) 分析
partitioner.Partitioner(self, group_id, partitions=partitions)
进入:
tooz/tooz/partitioner.py对应代码如下:

class Partitioner(object):
    """Partition set of objects across several members.

    Objects to be partitioned should implement the __tooz_hash__ method to
    identify themselves across the consistent hashring. This should method
    return bytes.

    """

    DEFAULT_PARTITION_NUMBER = hashring.HashRing.DEFAULT_PARTITION_NUMBER

    def __init__(self, coordinator, group_id,
                 partitions=DEFAULT_PARTITION_NUMBER):
        members = coordinator.get_members(group_id)
        self.partitions = partitions
        self.group_id = group_id
        self._coord = coordinator
        caps = [(m, self._coord.get_member_capabilities(self.group_id, m))
                for m in members.get()]
        self._coord.watch_join_group(self.group_id, self._on_member_join)
        self._coord.watch_leave_group(self.group_id, self._on_member_leave)
        self.ring = hashring.HashRing([], partitions=self.partitions)
        for m_id, cap in caps:
            self.ring.add_node(m_id, cap.get().get("weight", 1))

    def _on_member_join(self, event):
        weight = self._coord.get_member_capabilities(
            self.group_id, event.member_id).get().get("weight", 1)
        self.ring.add_node(event.member_id, weight)

    def _on_member_leave(self, event):
        self.ring.remove_node(event.member_id)

分析:
3.2.1) members = coordinator.get_members(group_id)
进入:
/usr/lib/python2.7/site-packages/tooz/drivers/redis.py(613)get_members()
def get_members(self, group_id):
使用线程池执行器futurist._futures.ThreadPoolExecutor去通过StrictRedis._client.transaction
执行_get_members方法,
_get_member方法通过redis的StrictPipeline调用HKEYS方法来获取形如 '_tooz_group:sky' 的组名
对应的哈希表中所有信息,过滤掉__create__,得到候选members,最后将候选members中每个成员名和心跳前缀_tooz_beats
组成的类似_tooz_beats:node-3000,检查所有除了当前组成员其他没有心跳的成员,加入到消失成员集合中,
然后调用redis的hdel命令删除组名(例如:_tooz_group:sky)中所有没有心跳的成员

(Pdb) p members
<tooz.coordination.CoordinatorResult object at 0x7fefcafc97d0>
(Pdb) p type(members)
<class 'tooz.coordination.CoordinatorResult'>
(Pdb) p members.__dict__
{'_fut': <Future at 0x7fefcafc9250 state=finished returned set>, '_failure_translator': <function _translate_failures at 0x7fefcb01bd70>}

(Pdb) p members._fut
<Future at 0x7fefcafc9250 state=finished returned set>
(Pdb) p type(members._fut)
<class 'concurrent.futures._base.Future'>
(Pdb) p members._fut.__dict__
{'_exception': None, '_result': set(['node-3000']), '_condition': <Condition(<_RLock owner=None count=0>, 0)>, '_state': 'FINISHED', '_traceback': None, '_waiters': [], '_done_callbacks': [<functools.partial object at 0x7fefcafc4cb0>]}


3.2.2) 分析
caps = [(m, self._coord.get_member_capabilities(self.group_id, m))
        for m in members.get()]
分析:
进入:
/usr/lib/python2.7/site-packages/tooz/drivers/redis.py(657)get_member_capabilities()
(Pdb) p caps
[('node-3000', <tooz.coordination.CoordinatorResult object at 0x7fefc9f6ae10>)]

get_member_capabilities(self, group_id, member_id):
读取redis组名对应的哈希表中该成员的权重值字典并返回

3.2.3) 分析
self.ring = hashring.HashRing([], partitions=self.partitions)
for m_id, cap in caps:
    self.ring.add_node(m_id, cap.get().get("weight", 1))

3.2.3.1) 入参分析
(Pdb) p m_id
'node-3000'
(Pdb) p cap
<tooz.coordination.CoordinatorResult object at 0x7fefc9f6ae10>
(Pdb) p type(cap)
<class 'tooz.coordination.CoordinatorResult'>

随后调用HashRing的add_node方法,具体参见4的分析

先提前总结:
__init__(self, coordinator, group_id, partitions=DEFAULT_PARTITION_NUMBER):
给协调器后端设置组成员离开和组成员加入的回调函数。
随之更新群组的一致性哈希,成员加入添加虚拟节点,
成员离开回收虚拟节点。
获取组成员集合,对每个组成员,去redis中获取该组成员的权重信息。
遍历每个组成员的名称,调用hashlib.md5(节点名称)来获取节点的md5值记录为key_hash
对每个组成员默认进行32*weight次循环,
每次循环处理过程如下:
对key_hash调用md5lib.hash的update方法,得到新的md5值,
然后将16进制的md5值转换为10进制整数,然后更新一致性哈希字典ring,
更新为: ring[md5的十进制值]=组成员名称
记录组成员的权重: nodes[组成员名称] = 组成员的权重
最后所有组成员处理完成后,设置划分数组为
一致性哈希字典ring的所有键排序后的结果,用于后续二分查找确定要处理的东西靠近哪个
md5值,然后根据该md5值找到对应的组成员,将请求分发给该组成员处理

4 分析HashRing
进入tooz/hashring.py
代码如下:
class HashRing(object):
    """Map objects onto nodes based on their consistent hash."""

    DEFAULT_PARTITION_NUMBER = 2**5

    def __init__(self, nodes, partitions=DEFAULT_PARTITION_NUMBER):
        """Create a new hashring.

        :param nodes: List of nodes where objects will be mapped onto.
        :param partitions: Number of partitions to spread objects onto.
        """
        self.nodes = {}
        self._ring = dict()
        self._partitions = []
        self._partition_number = partitions

        self.add_nodes(set(nodes))

分析:
4.1) 入参分析
(Pdb) p nodes
[]
DEFAULT_PARTITION_NUMBER默认为32

4.2)分析add_node方法
    def add_node(self, node, weight=1):
        """Add a node to the hashring.

        :param node: Node to add.
        :param weight: How many resource instances this node should manage
        compared to the other nodes (default 1). Higher weights will be
        assigned more resources. Three nodes A, B and C with weights 1, 2 and 3
        will each handle 1/6, 1/3 and 1/2 of the resources, respectively.

        """
        return self.add_nodes((node,), weight)


4.2.1)入参分析
(Pdb) p node
'node-3000'
(Pdb) p weight
1

4.2.2) 分析
调用的add_nodes方法,具体如下:
    def add_nodes(self, nodes, weight=1):
        """Add nodes to the hashring with equal weight

        :param nodes: Nodes to add.
        :param weight: How many resource instances this node should manage
        compared to the other nodes (default 1). Higher weights will be
        assigned more resources. Three nodes A, B and C with weights 1, 2 and 3
        will each handle 1/6, 1/3 and 1/2 of the resources, respectively.
        """
        for node in nodes:
            key = utils.to_binary(node, 'utf-8')
            key_hash = hashlib.md5(key)
            for r in six.moves.range(self._partition_number * weight):
                key_hash.update(key)
                self._ring[self._hash2int(key_hash)] = node

            self.nodes[node] = weight

        self._partitions = sorted(self._ring.keys())

4.2.2.1)变量分析
(Pdb) p nodes
('node-3000',)
(Pdb) p weight
1

(Pdb) p node
'node-3000'
(Pdb) p key
'node-3000'
(Pdb) p key_hash
<md5 HASH object @ 0x7fefc9f93260>
(Pdb) p type(key_hash)
<type '_hashlib.HASH'>

(Pdb) p key
'node-3000'
弄清楚:
hashlib.md5.update
md5.update  会将每次字符串拼接
ref:
https://blog.51cto.com/1inux/2108959

第一次执行结果:
(Pdb) p key_hash
<md5 HASH object @ 0x7fefc9f93260>

第二次:
(Pdb) p key_hash
<md5 HASH object @ 0x7fefc9f93260>
(Pdb) p key_hash.hexdigest()
'8866d8daf55b46577311398542bf0dd8'

第三次:
(Pdb) p key_hash
<md5 HASH object @ 0x7fefc9f93260>
(Pdb) p key_hash.hexdigest()
'3f8e11dde913fc15330b4841614a8501'

_hash2int(key): 表示将16进制的md5结果转换为10进制整数
输入结果样例:
<md5 HASH object @ 0x7fefc9f93260>
返回结果样例:
94349557664644695588006514081887294582L

keys方法返回字典的所有键,这里就是哈希值
(Pdb) p self._partitions
[9101826525951613175432792623211427747L, 30028420957080968027827032720881255701L, 47713832108673592235538076698980016894L, 49484894622123764194334714959844419953L, 58805507990936298890254435360855056347L, 68914727356551315131597485578395823994L, 71557230046726731486002917189518880570L, 77590700806915862040818251517005689698L, 84479032270882990553366491184904176897L, 94349557664644695588006514081887294582L, 105602429555454214449946553903447671293L, 118994027915277724575563950107646851594L, 135591386193152813536427140891598846843L, 140397949185240025766067737642550048262L, 154623871365156411212462004141588147043L, 155045498511259226289048299902558099521L, 177143011800303654931694027156292653088L, 181309020054467099838091518836410879448L, 182663186055434518693503281666208020212L, 193175624716717498167140474673273452993L, 203778790255403559105921860674899523176L, 211536901380496309068745126249059634924L, 225935520899190683125057350910388571266L, 237623502696777410001416097868724943024L, 250797869699005336385109425749852200044L, 270699814193575314616715386903209222405L, 275107054663646284801395915873859020241L, 277835064230897229109972682695314758479L, 288712363595930393887665554325588759041L, 302553405768077176679658570298670839551L, 321213416142659019686880095774970912915L, 340148343400952977489419833970623500155L]

之所以排序,是用于后面二分查找

4.2.2.2)
总结:
add_nodes(self, nodes, weight=1):
遍历每个组成员的名称,调用hashlib.md5(节点名称)来获取节点的md5值记录为key_hash
对每个组成员默认进行32*weight次循环,
每次循环处理过程如下:
对key_hash调用md5lib.hash的update方法,得到新的md5值,
然后将16进制的md5值转换为10进制整数,然后更新一致性哈希字典ring,
更新为: ring[md5的十进制值]=组成员名称
记录组成员的权重: nodes[组成员名称] = 组成员的权重
最后所有组成员处理完成后,设置划分数组为
一致性哈希字典ring的所有键排序后的结果,用于后续二分查找确定要处理的东西靠近哪个
md5值,然后根据该md5值找到对应的组成员,将请求分发给该组成员处理


5 回到
    def join_partitioned_group(
            self, group_id,
            weight=1,
            partitions=partitioner.Partitioner.DEFAULT_PARTITION_NUMBER):
        self.join_group_create(group_id, capabilities={'weight': weight})
        return partitioner.Partitioner(self, group_id, partitions=partitions)
总结:
join_partitioned_group:
加入到群组中,如果组没有创建,则创建这个组,返回分区器,分区器中包含了该组对应的一致性哈希。
具体过程如下:
join_group_create(self, group_id, capabilities=b""):
具体过程如下:
步骤1: 首次尝试加入到组中,如果加入组失败,则抛出异常,进入到创建组的代码
步骤2: 创建组时,调用create_group方法,该方法具体如下:
create_group(self, group_id):
获取'create_group'对应的lua脚本,然后使用线程池执行器futurist._futures.ThreadPoolExecutor去执行_creage_group方法
_create_group方法中则创建group实际是调用lua脚本去做的,具体是:
如果待创建group名称已经在redis中存在,则直接返回;
否则则调用redis的sadd方法将当前待创建group名称加入到group集合中,
然后调用hset方法将键值对<"__created__", "1">写入到group名称的hash表中。

其中lua脚本对象是redis.client.Script对象, 脚本内容如下:
create_group对应脚本如下:

"""
-- Extract *all* the variables (so we can easily know what they are)...
local namespaced_group_key = KEYS[1]
local all_groups_key = KEYS[2]
local no_namespaced_group_key = ARGV[1]
if redis.call("exists", namespaced_group_key) == 1 then
    return 0
end
redis.call("sadd", all_groups_key, no_namespaced_group_key)
redis.call("hset", namespaced_group_key,
           "${group_existence_key}", "${group_existence_value}")
return 1
"""
步骤3: 继续调用join_group(group_id, capabilities)方法将成员加入到组中,具体
方法如下:
        def join_group(self, group_id, capabilities=b""):
通过StrictRedis.transaction方法调用pipelien去执行_join_group(p)方法,
而_join_group(p)方法具体处理逻辑如下:
如果组不存在,则抛出异常;否则,调用redis的multi方法开启一个事务,
然后调用redis的haset方法,具体如下:
hset(encoded_group, encoded_member_id, self._dumps(capabilities))
最后调用redis的execute方法执行事务对应的队列中的所有命令,如果haset设置
hash表的表名是组名,键是成员id,值是成员的权重,形如: '{'weight': 1}'
如果执行成功,则将当前创建的组名加入到组集合中。
总结:
这里创建的组实际上redis中的hash,所有的组是redis中的set,组成员是string的键值对,样例类似如下:
127.0.0.1:6379> HKEYS _tooz_group:sky
1) "__created__"
2) "node-3000"

127.0.0.1:6379> HGET _tooz_group:sky node-3000
"\x81\xc4\x06weight\x01"
127.0.0.1:6379> HGET _tooz_group:sky __created__
"1"

附上其他redis信息:
127.0.0.1:6379> KEYS *
1) "_tooz_groups"
2) "_tooz_beats:node-3000"
3) "_tooz_group:sky"

127.0.0.1:6379> SMEMBERS _tooz_groups
1) "sky"
127.0.0.1:6379> GET _tooz_beats:node-3000
"Not dead!"

步骤4: 实例化分区器,传入组的名称
给协调器后端设置组成员离开和组成员加入的回调函数。
随之更新群组的一致性哈希,成员加入添加虚拟节点,
成员离开回收虚拟节点。
获取组成员集合,对每个组成员,去redis中获取该组成员的权重信息。
遍历每个组成员的名称,调用hashlib.md5(节点名称)来获取节点的md5值记录为key_hash
对每个组成员默认进行32*weight次循环,
每次循环处理过程如下:
对key_hash调用md5lib.hash的update方法,得到新的md5值,
然后将16进制的md5值转换为10进制整数,然后更新一致性哈希字典ring,
更新为: ring[md5的十进制值]=组成员名称
记录组成员的权重: nodes[组成员名称] = 组成员的权重
最后所有组成员处理完成后,设置划分数组为
一致性哈希字典ring的所有键排序后的结果,用于后续二分查找确定要处理的东西靠近哪个
md5值,然后根据该md5值找到对应的组成员,将请求分发给该组成员处理。

6 继续回到调用端的代码
具体如下:
    def process(self):
        self.coordinator.start(start_heart=True)
        # import pdb;pdb.set_trace()
        self.hashring = self.coordinator.join_partitioned_group(
            self.PARTION_NAMESPACE
        )

        @periodics.periodic(spacing=3000, run_immediately=True)
        def checkGroupStatus():
            self.coordinator.run_watchers()
            if self.groupState != self.hashring.ring.nodes:
                self.groupState = self.hashring.ring.nodes.copy()
                # TODO()
                self.refreshService()

        self.periodic = periodics.PeriodicWorker.create(
            [],
            executor_factory=lambda: futures.ThreadPoolExecutor(max_workers=10))

        self.periodic.add(checkGroupStatus)
        spawnThread(self.periodic.start)

分析:
6.1) 
刚才已经分析了
        self.hashring = self.coordinator.join_partitioned_group(
            self.PARTION_NAMESPACE
        )
方法,具体就是:
加入到群组中,如果组没有创建,则创建这个组,返回分区器,分区器中包含了该组对应的一致性哈希。


返回结果样例如下:
(Pdb) p self.hashring
<tooz.partitioner.Partitioner object at 0x7fd5a162c7d0>
(Pdb) p type(self.hashring)
<class 'tooz.partitioner.Partitioner'>
(Pdb) p self.hashring.__dict__
{'ring': <tooz.hashring.HashRing object at 0x7fd5a1631550>, 'group_id': 'sky', '_coord': <tooz.drivers.redis.RedisDriver object at 0x7fd5a169bcd0>, 'partitions': 32}

(Pdb) p self.hashring.ring
<tooz.hashring.HashRing object at 0x7fd5a1631550>
(Pdb) p type(self.hashring.ring)
<class 'tooz.hashring.HashRing'>
(Pdb) p self.hashring.ring.nodes
{'node-3000': 1}
(Pdb) p self.hashring.ring.__dict__
{'_partitions': [9101826525951613175432792623211427747L, 30028420957080968027827032720881255701L, 47713832108673592235538076698980016894L, 49484894622123764194334714959844419953L, 58805507990936298890254435360855056347L, 68914727356551315131597485578395823994L, 71557230046726731486002917189518880570L, 77590700806915862040818251517005689698L, 84479032270882990553366491184904176897L, 94349557664644695588006514081887294582L, 105602429555454214449946553903447671293L, 118994027915277724575563950107646851594L, 135591386193152813536427140891598846843L, 140397949185240025766067737642550048262L, 154623871365156411212462004141588147043L, 155045498511259226289048299902558099521L, 177143011800303654931694027156292653088L, 181309020054467099838091518836410879448L, 182663186055434518693503281666208020212L, 193175624716717498167140474673273452993L, 203778790255403559105921860674899523176L, 211536901380496309068745126249059634924L, 225935520899190683125057350910388571266L, 237623502696777410001416097868724943024L, 250797869699005336385109425749852200044L, 270699814193575314616715386903209222405L, 275107054663646284801395915873859020241L, 277835064230897229109972682695314758479L, 288712363595930393887665554325588759041L, 302553405768077176679658570298670839551L, 321213416142659019686880095774970912915L, 340148343400952977489419833970623500155L], 'nodes': {'node-3000': 1}, '_ring': {250797869699005336385109425749852200044L: 'node-3000', 140397949185240025766067737642550048262L: 'node-3000', 340148343400952977489419833970623500155L: 'node-3000', 135591386193152813536427140891598846843L: 'node-3000', 84479032270882990553366491184904176897L: 'node-3000', 211536901380496309068745126249059634924L: 'node-3000', 193175624716717498167140474673273452993L: 'node-3000', 203778790255403559105921860674899523176L: 'node-3000', 94349557664644695588006514081887294582L: 'node-3000', 58805507990936298890254435360855056347L: 'node-3000', 49484894622123764194334714959844419953L: 'node-3000', 47713832108673592235538076698980016894L: 'node-3000', 277835064230897229109972682695314758479L: 'node-3000', 288712363595930393887665554325588759041L: 'node-3000', 181309020054467099838091518836410879448L: 'node-3000', 105602429555454214449946553903447671293L: 'node-3000', 177143011800303654931694027156292653088L: 'node-3000', 30028420957080968027827032720881255701L: 'node-3000', 321213416142659019686880095774970912915L: 'node-3000', 275107054663646284801395915873859020241L: 'node-3000', 155045498511259226289048299902558099521L: 'node-3000', 9101826525951613175432792623211427747L: 'node-3000', 237623502696777410001416097868724943024L: 'node-3000', 225935520899190683125057350910388571266L: 'node-3000', 302553405768077176679658570298670839551L: 'node-3000', 118994027915277724575563950107646851594L: 'node-3000', 154623871365156411212462004141588147043L: 'node-3000', 77590700806915862040818251517005689698L: 'node-3000', 182663186055434518693503281666208020212L: 'node-3000', 270699814193575314616715386903209222405L: 'node-3000', 71557230046726731486002917189518880570L: 'node-3000', 68914727356551315131597485578395823994L: 'node-3000'}, '_partition_number': 32}


6.2)分析
        @periodics.periodic(spacing=1, run_immediately=True)
        def checkGroupStatus():
            self.coordinator.run_watchers()
            if self.groupState != self.hashring.ring.nodes:
                self.groupState = self.hashring.ring.nodes.copy()
                # TODO()
                self.refreshService()

分析:
6.2.1) 
这里是启动一个定时任务,每隔1秒调用协调器的run_watchers方法,
具体进入:
tooz/drivers/redis.py的
    def run_watchers(self, timeout=None):
        result = super(RedisDriver, self).run_watchers(timeout=timeout)
        self.run_elect_coordinator()
        return result

然后调用其父类的:
tooz/coordination.py的
    def run_watchers(self, timeout=None):
        with timeutils.StopWatch(duration=timeout) as w:
            result = []
            group_with_hooks = set(self._hooks_join_group.keys()).union(
                set(self._hooks_leave_group.keys()))
            for group_id in group_with_hooks:
                try:
                    group_members = self.get_members(group_id).get(
                        timeout=w.leftover(return_none=True))
                except GroupNotCreated:
                    group_members = set()
                if (group_id in self._joined_groups and
                        self._member_id not in group_members):
                    self._joined_groups.discard(group_id)
                old_group_members = self._group_members.get(group_id, set())
                for member_id in (group_members - old_group_members):
                    result.extend(
                        self._hooks_join_group[group_id].run(
                            MemberJoinedGroup(group_id, member_id)))
                for member_id in (old_group_members - group_members):
                    result.extend(
                        self._hooks_leave_group[group_id].run(
                            MemberLeftGroup(group_id, member_id)))
                self._group_members[group_id] = group_members
            return result

分析:
run_watchers(self, timeout=None):
获取所有_hooks_join_group(组名到组加入时执行函数的字典),
_hooks_leave_group(组名到组离开时执行函数的字典),
中所有的组名,对每个组名获取其组成员集合,如果发现当前tooz driver
保存的成员id不在该组的成员集合中,就从加入的组集合中删除当前组名;对于存在的组员,则加入到组中;对于不存在的组员,则从组中移除。

而self.run_elect_coordinator()
则在群组管理的代码中没有实际执行内容。


7 总结
tooz中可以使用Redis做为driver实现了群组管理,负载均衡,分布式锁等功能,
可以在python项目中通过tooz来构建真正可以水平扩展的服务,在面对
大规模部署环境下,基于tooz的负载均衡 + 分布式锁的python应用
在生产环境应该可以发挥出一定优势。

参考:
tooz 1.57.4版本
redis官方文档
 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值