---------------------------------------------------------------------------
TokenizationError Traceback (most recent call last)
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:210, in normalize_object(o)
209 try:
--> 210 return _normalize_pickle(o)
211 except Exception:
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:257, in _normalize_pickle(o)
256 if pik is None:
--> 257 _maybe_raise_nondeterministic("Failed to tokenize deterministically")
258 pik = int(uuid.uuid4())
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:89, in _maybe_raise_nondeterministic(msg)
88 if val or val is None and config.get("tokenize.ensure-deterministic"):
---> 89 raise TokenizationError(msg)
TokenizationError: Failed to tokenize deterministically
During handling of the above exception, another exception occurred:
TokenizationError Traceback (most recent call last)
Cell In[89], line 6
3 start_time = time.time()
5 # 模拟执行代码
----> 6 hold_data()
8 # 获取程序结束时间
9 end_time = time.time()
Cell In[61], line 48, in hold_data()
46 dddf = process_partition(df,df.columns,nul_columns, param.get('nullValues'))
47 # dddf = calculate_non_null_ratio(df,df.columns)
---> 48 print(dddf.compute())
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/base.py:373, in DaskMethodsMixin.compute(self, **kwargs)
349 def compute(self, **kwargs):
350 """Compute this dask collection
351
352 This turns a lazy Dask collection into its in-memory equivalent.
(...) 371 dask.compute
372 """
--> 373 (result,) = compute(self, traverse=False, **kwargs)
374 return result
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/base.py:678, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
659 expr = FinalizeCompute(expr)
661 with shorten_traceback():
662 # The high level optimize will have to be called client side (for now)
663 # The optimize can internally trigger already a computation
(...) 675 # change the graph submission to a handshake which introduces all sorts
676 # of concurrency control issues)
--> 678 expr = expr.optimize()
679 keys = list(flatten(expr.__dask_keys__()))
681 results = schedule(expr, keys, **kwargs)
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:437, in Expr.optimize(self, fuse)
434 def optimize(self, fuse: bool = False) -> Expr:
435 stage: OptimizerStage = "fused" if fuse else "simplified-physical"
--> 437 return optimize_until(self, stage)
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:931, in optimize_until(expr, stage)
928 return result
930 # Simplify
--> 931 expr = result.simplify()
932 if stage == "simplified-logical":
933 return expr
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:447, in Expr.simplify(self)
445 while True:
446 dependents = collect_dependents(expr)
--> 447 new = expr.simplify_once(dependents=dependents, simplified={})
448 if new._name == expr._name:
449 break
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:417, in Expr.simplify_once(self, dependents, simplified)
414 if isinstance(operand, Expr):
415 # Bandaid for now, waiting for Singleton
416 dependents[operand._name].append(weakref.ref(expr))
--> 417 new = operand.simplify_once(
418 dependents=dependents, simplified=simplified
419 )
420 simplified[operand._name] = new
421 if new._name != operand._name:
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:417, in Expr.simplify_once(self, dependents, simplified)
414 if isinstance(operand, Expr):
415 # Bandaid for now, waiting for Singleton
416 dependents[operand._name].append(weakref.ref(expr))
--> 417 new = operand.simplify_once(
418 dependents=dependents, simplified=simplified
419 )
420 simplified[operand._name] = new
421 if new._name != operand._name:
[... skipping similar frames: Expr.simplify_once at line 417 (2943 times)]
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:417, in Expr.simplify_once(self, dependents, simplified)
414 if isinstance(operand, Expr):
415 # Bandaid for now, waiting for Singleton
416 dependents[operand._name].append(weakref.ref(expr))
--> 417 new = operand.simplify_once(
418 dependents=dependents, simplified=simplified
419 )
420 simplified[operand._name] = new
421 if new._name != operand._name:
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:390, in Expr.simplify_once(self, dependents, simplified)
387 expr = self
389 while True:
--> 390 out = expr._simplify_down()
391 if out is None:
392 out = expr
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/dataframe/dask_expr/_expr.py:1977, in Assign._simplify_down(self)
1974 if self._check_for_previously_created_column(self.frame):
1975 # don't squash if we are using a column that was previously created
1976 return
-> 1977 return Assign(*self._remove_common_columns(self.frame))
1978 elif isinstance(self.frame, Projection) and isinstance(
1979 self.frame.frame, Assign
1980 ):
1981 if self._check_for_previously_created_column(self.frame.frame):
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:875, in SingletonExpr.__new__(cls, _determ_token, *args, **kwargs)
873 if not hasattr(cls, "_instances"):
874 cls._instances = weakref.WeakValueDictionary()
--> 875 inst = super().__new__(cls, *args, _determ_token=_determ_token, **kwargs)
876 _name = inst._name
877 if _name in cls._instances and cls.__init__ == object.__init__:
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:72, in Expr.__new__(cls, _determ_token, *args, **kwargs)
69 inst.operands = [_unpack_collections(o) for o in operands]
70 # This is typically cached. Make sure the cache is populated by calling
71 # it once
---> 72 inst._name
73 return inst
File ~/.conda/envs/py311/lib/python3.11/functools.py:1001, in cached_property.__get__(self, instance, owner)
999 val = cache.get(self.attrname, _NOT_FOUND)
1000 if val is _NOT_FOUND:
-> 1001 val = self.func(instance)
1002 try:
1003 cache[self.attrname] = val
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/dataframe/dask_expr/_expr.py:604, in Blockwise._name(self)
602 else:
603 head = funcname(type(self)).lower()
--> 604 return head + "-" + self.deterministic_token
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:538, in Expr.deterministic_token(self)
533 @property
534 def deterministic_token(self):
535 if not self._determ_token:
536 # Just tokenize self to fall back on __dask_tokenize__
537 # Note how this differs to the implementation of __dask_tokenize__
--> 538 self._determ_token = self.__dask_tokenize__()
539 return self._determ_token
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:147, in Expr.__dask_tokenize__(self)
141 def __dask_tokenize__(self):
142 if not self._determ_token:
143 # If the subclass does not implement a __dask_tokenize__ we'll want
144 # to tokenize all operands.
145 # Note how this differs to the implementation of
146 # Expr.deterministic_token
--> 147 self._determ_token = _tokenize_deterministic(type(self), *self.operands)
148 return self._determ_token
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:457, in _tokenize_deterministic(*args, **kwargs)
455 def _tokenize_deterministic(*args, **kwargs) -> str:
456 # Utility to be strict about deterministic tokens
--> 457 return tokenize(*args, ensure_deterministic=True, **kwargs)
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:76, in tokenize(ensure_deterministic, *args, **kwargs)
74 token = _ENSURE_DETERMINISTIC.set(ensure_deterministic)
75 try:
---> 76 return _tokenize(*args, **kwargs)
77 finally:
78 if token:
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:34, in _tokenize(*args, **kwargs)
33 def _tokenize(*args: object, **kwargs: object) -> str:
---> 34 token: object = _normalize_seq_func(args)
35 if kwargs:
36 token = token, _normalize_seq_func(sorted(kwargs.items()))
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:154, in _normalize_seq_func(seq)
152 _SEEN[id(seq)] = len(_SEEN), seq
153 try:
--> 154 return tuple(map(_inner_normalize_token, seq))
155 finally:
156 del _SEEN[id(seq)]
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:147, in _normalize_seq_func.<locals>._inner_normalize_token(item)
145 if isinstance(item, _IDENTITY_DISPATCH):
146 return item
--> 147 return normalize_token(item)
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:212, in normalize_object(o)
210 return _normalize_pickle(o)
211 except Exception:
--> 212 _maybe_raise_nondeterministic(
213 f"Object {o!r} cannot be deterministically hashed. This likely "
214 "indicates that the object cannot be serialized deterministically."
215 )
216 return uuid.uuid4().hex
File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:89, in _maybe_raise_nondeterministic(msg)
87 val = None
88 if val or val is None and config.get("tokenize.ensure-deterministic"):
---> 89 raise TokenizationError(msg)
TokenizationError: Object <class 'dask.dataframe.dask_expr._expr.Assign'> cannot be deterministically hashed. This likely indicates that the object cannot be serialized deterministically.