Python/sorts/

本文深入探讨了多种Python排序算法的实现与应用,包括冒泡排序、桶排序、鸡尾酒摇动排序、计数排序、堆排序等。通过具体代码示例,详细解析了每种算法的工作原理和性能特点。

Python/sorts/cocktail_shaker_sort.py

Python/sorts/counting_sort.py

assert “e” == “e”
assert “e” == “a”
Traceback (most recent call last):
File “<pyshell#88>”, line 1, in
assert “e” == “a”
AssertionError

python中使用函数ord(),可以字符转换为对应数值,使用函数chr可以将数值转换为对应字符

string=“thisisthestring”
[ord© for c in string]
[116, 104, 105, 115, 105, 115, 116, 104, 101, 115, 116, 114, 105, 110, 103]

#list的生成

[0]*10
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

[0]*18
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

[1]*10
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

counting_arr
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

counting_arr[116 - 101] += 1
counting_arr
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]

counting_arr[104 - 101] += 1
counting_arr
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]

counting_arr[105 - 101] += 1
counting_arr
[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]

counting_arr[115 - 101] += 1
counting_arr
[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]

counting_arr[3] += 1
counting_arr
[0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]

counting_arr[3] += 1
counting_arr
[0, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]

a=0
a+=1
a
1

a+=1
a
2

Python/sorts/cycle_sort.py

string
‘thisisthestring’

string[:-1]
‘thisisthestrin’

Python/sorts/heap_sort.py
##函数套函数

unsorted
[4.0, -1.0, -8.0, 7.0]

collection=unsorted
def merge_sort(collection):
def merge(left,right):#这里定义函数,函数套函数就是为了给下面的参数内部调用
result=[]
while left and right:
result.append((left if left[0]<=right[0] else right).pop(0))
return result+left+right
if len(collection)<=1:
return collection
mid=len(collection)//2
return merge(merge_sort(collection[:mid]),merge_sort(collection[mid:]))###这里调用merge

s=merge_sort(collection)
s
[-8.0, -1.0, 4.0, 7.0]

mid = len(collection) // 2
mid
2

collection[:mid]
[-1.0, 4.0]

collection[mid:]
[-8.0, 7.0]

collection[:2]
[-1.0, 4.0]

collection[2:]
[-8.0, 7.0]

left
[-1.0, 4.0]

right
[-8.0, 7.0]

left if left[0]<=right[0] else right
[-8.0, 7.0]

left if left[0]>=right[0] else right
[-1.0, 4.0]

(left if left[0] <= right[0] else right).pop(0)
-8.0

result=[]
result.append((left if left[0]<=right[0] else right).pop(0))
result
[-8.0]

Python/sorts/normal_distribution_quick_sort.md

arr=[0, 10, 15, 3, 2, 9, 14, 13]
cur = len(arr)
arr[0:cur]
[0, 10, 15, 3, 2, 9, 14, 13]

max(arr[0:cur])
15

max(arr)
15

arr.index(max(arr[0:cur]))
2

[list() for _ in range( 5 )]
[[], [], [], [], []]

[list() for _ in range( 5 )][1]
[]

[list() for _ in range( 5 )][1].append(5)
[list() for _ in range( 5 )]
[[], [], [], [], []]

buckets = [list() for _ in range( 5 )]
buckets
[[], [], [], [], []]

buckets[1].append(3)
buckets
[[], [3], [], [], []]

Python/sorts/random_normal_distribution_quicksort.py

from tempfile import TemporaryFile
outfile = TemporaryFile()
outfile
<tempfile._TemporaryFileWrapper object at 0x036AC550>

outfile.seek(0)
0

import numpy as np

from random import randint
M = np.load(outfile)
Traceback (most recent call last):
File “<pyshell#19>”, line 1, in
M = np.load(outfile)
####缺少np.save(outfile, X),所以上面不能成功load(outfile)
#######从新

outfile = TemporaryFile()
p = 100 # 1000 elements are to be sorted
mu, sigma = 0, 1 # mean and standard deviation
X = np.random.normal(mu, sigma, p)
np.save(outfile, X)
print(X)
[ 6.78274090e-01 -8.52825948e-01 -6.03075501e-01 -1.00420005e+00
-7.96532326e-01 -1.65382101e-01 1.43116450e-01 -1.97848426e-01
-1.33776608e+00 1.23098969e+00 -5.90892251e-01 2.25601202e-02
8.78504619e-01 -2.12425011e-01 -5.30371020e-02 4.95412246e-04
5.93632659e-01 1.57925727e+00 -1.56882239e-02 -6.29695805e-01
1.24938150e-01 8.80617366e-01 -7.99251066e-01 2.38656180e+00
3.32651204e-01 -3.57059719e-01 1.61777246e-01 -6.87525484e-01
8.93905681e-02 3.62200207e-01 4.91261620e-01 -5.67746134e-01
1.48533911e+00 4.29460700e-02 -5.28135571e-01 3.09401599e-01
1.45018211e+00 3.16740944e-03 -8.63631498e-01 6.35933960e-01
-7.16705078e-01 1.66224636e+00 9.34081343e-01 -1.32380505e+00
-4.16166793e-01 1.11065891e+00 -4.16605130e-03 2.71138500e-01
1.22826598e-01 -4.06749298e-01 6.62851533e-01 -7.70384316e-01
-4.32017494e-01 -2.44006823e-01 -1.21873650e+00 6.17939014e-01
-6.42422588e-01 -4.16299025e-01 -7.86703819e-01 -1.24358057e+00
4.44874275e-01 -4.30851030e-01 -9.17325632e-01 -7.53058105e-01
-1.01299996e+00 -4.14214642e-01 1.82587820e+00 -2.10552535e-01
2.06464913e-01 -1.30648658e+00 8.30463175e-01 1.36821577e-01
7.41007115e-02 -9.29753546e-01 -6.42642000e-01 -4.94151662e-02
-1.61395425e+00 9.24641894e-01 -1.80790634e+00 2.60945833e-02
2.51620478e-01 -1.37166492e+00 -5.44686268e-01 4.88666865e-01
1.04054454e+00 -4.53437729e-01 -4.76563488e-02 -5.37804304e-01
-7.80898952e-01 -2.14847616e-01 4.62686639e-01 6.81068567e-02
-1.61825081e+00 4.84324796e-02 7.45973514e-01 -5.55606619e-02
-3.45508822e-04 -2.06063368e+00 1.75935275e+00 1.08048387e+00]

outfile.seek(0) # using the same array
0

M = np.load(outfile)

M
array([ 6.78274090e-01, -8.52825948e-01, -6.03075501e-01, -1.00420005e+00,
-7.96532326e-01, -1.65382101e-01, 1.43116450e-01, -1.97848426e-01,
-1.33776608e+00, 1.23098969e+00, -5.90892251e-01, 2.25601202e-02,
8.78504619e-01, -2.12425011e-01, -5.30371020e-02, 4.95412246e-04,
5.93632659e-01, 1.57925727e+00, -1.56882239e-02, -6.29695805e-01,
1.24938150e-01, 8.80617366e-01, -7.99251066e-01, 2.38656180e+00,
3.32651204e-01, -3.57059719e-01, 1.61777246e-01, -6.87525484e-01,
8.93905681e-02, 3.62200207e-01, 4.91261620e-01, -5.67746134e-01,
1.48533911e+00, 4.29460700e-02, -5.28135571e-01, 3.09401599e-01,
1.45018211e+00, 3.16740944e-03, -8.63631498e-01, 6.35933960e-01,
-7.16705078e-01, 1.66224636e+00, 9.34081343e-01, -1.32380505e+00,
-4.16166793e-01, 1.11065891e+00, -4.16605130e-03, 2.71138500e-01,
1.22826598e-01, -4.06749298e-01, 6.62851533e-01, -7.70384316e-01,
-4.32017494e-01, -2.44006823e-01, -1.21873650e+00, 6.17939014e-01,
-6.42422588e-01, -4.16299025e-01, -7.86703819e-01, -1.24358057e+00,
4.44874275e-01, -4.30851030e-01, -9.17325632e-01, -7.53058105e-01,
-1.01299996e+00, -4.14214642e-01, 1.82587820e+00, -2.10552535e-01,
2.06464913e-01, -1.30648658e+00, 8.30463175e-01, 1.36821577e-01,
7.41007115e-02, -9.29753546e-01, -6.42642000e-01, -4.94151662e-02,
-1.61395425e+00, 9.24641894e-01, -1.80790634e+00, 2.60945833e-02,
2.51620478e-01, -1.37166492e+00, -5.44686268e-01, 4.88666865e-01,
1.04054454e+00, -4.53437729e-01, -4.76563488e-02, -5.37804304e-01,
-7.80898952e-01, -2.14847616e-01, 4.62686639e-01, 6.81068567e-02,
-1.61825081e+00, 4.84324796e-02, 7.45973514e-01, -5.55606619e-02,
-3.45508822e-04, -2.06063368e+00, 1.75935275e+00, 1.08048387e+00])

Python/sorts/random_pivot_quick_sort.py

TEST_CASES = [
{‘input’: [8, 7, 6, 5, 4, 3, -2, -5], ‘expected’: [-5, -2, 3, 4, 5, 6, 7, 8]},
{‘input’: [-5, -2, 3, 4, 5, 6, 7, 8], ‘expected’: [-5, -2, 3, 4, 5, 6, 7, 8]},
{‘input’: [5, 6, 1, 4, 0, 1, -2, -5, 3, 7], ‘expected’: [-5, -2, 0, 1, 1, 3, 4, 5, 6, 7]},
{‘input’: [2, -2], ‘expected’: [-2, 2]},
{‘input’: [1], ‘expected’: [1]},
{‘input’: [], ‘expected’: []},
]

‘’’
TODO:
- Fix some broken tests in particular cases (as [] for example),
- Unify the input format: should always be function(input_collection) (no additional args)
- Unify the output format: should always be a collection instead of
updating input elements and returning None
- Rewrite some algorithms in function format (in case there is no function definition)
‘’’

TEST_FUNCTIONS = [
bogo_sort,
bubble_sort,
bucket_sort,
cocktail_shaker_sort,
comb_sort,
counting_sort,
cycle_sort,
gnome_sort,
heap_sort,
insertion_sort,
merge_sort_fastest,
merge_sort,
pancake_sort,
quick_sort_3partition,
quick_sort,
radix_sort,
quick_sort_random,
selection_sort,
shell_sort,
tim_sort,
topological_sort,
tree_sort,
wiggle_sort,
]

for function in TEST_FUNCTIONS:
for case in TEST_CASES:
result = function(case[‘input’])
assert result == case[‘expected’], ‘Executed function: {}, {} != {}’.format(function.name, result, case[‘expected’])

Python/sorts/tim_sort.py
#list相加还是list

lst
[5, 9, 10, 3, -4, 5, 178, 92, 46, -18, 0, 7]

lst[:5]
[5, 9, 10, 3, -4]

lst[5:7]
[5, 178]

value = lst[1]
value
9

lst[:5]+lst[5:7]+[value]
[5, 9, 10, 3, -4, 5, 178, 9]

Python/sorts/topological_sort.py

--------------------------------------------------------------------------- TokenizationError Traceback (most recent call last) File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:210, in normalize_object(o) 209 try: --> 210 return _normalize_pickle(o) 211 except Exception: File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:257, in _normalize_pickle(o) 256 if pik is None: --> 257 _maybe_raise_nondeterministic("Failed to tokenize deterministically") 258 pik = int(uuid.uuid4()) File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:89, in _maybe_raise_nondeterministic(msg) 88 if val or val is None and config.get("tokenize.ensure-deterministic"): ---> 89 raise TokenizationError(msg) TokenizationError: Failed to tokenize deterministically During handling of the above exception, another exception occurred: TokenizationError Traceback (most recent call last) Cell In[89], line 6 3 start_time = time.time() 5 # 模拟执行代码 ----> 6 hold_data() 8 # 获取程序结束时间 9 end_time = time.time() Cell In[61], line 48, in hold_data() 46 dddf = process_partition(df,df.columns,nul_columns, param.get('nullValues')) 47 # dddf = calculate_non_null_ratio(df,df.columns) ---> 48 print(dddf.compute()) File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/base.py:373, in DaskMethodsMixin.compute(self, **kwargs) 349 def compute(self, **kwargs): 350 """Compute this dask collection 351 352 This turns a lazy Dask collection into its in-memory equivalent. (...) 371 dask.compute 372 """ --> 373 (result,) = compute(self, traverse=False, **kwargs) 374 return result File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/base.py:678, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs) 659 expr = FinalizeCompute(expr) 661 with shorten_traceback(): 662 # The high level optimize will have to be called client side (for now) 663 # The optimize can internally trigger already a computation (...) 675 # change the graph submission to a handshake which introduces all sorts 676 # of concurrency control issues) --> 678 expr = expr.optimize() 679 keys = list(flatten(expr.__dask_keys__())) 681 results = schedule(expr, keys, **kwargs) File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:437, in Expr.optimize(self, fuse) 434 def optimize(self, fuse: bool = False) -> Expr: 435 stage: OptimizerStage = "fused" if fuse else "simplified-physical" --> 437 return optimize_until(self, stage) File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:931, in optimize_until(expr, stage) 928 return result 930 # Simplify --> 931 expr = result.simplify() 932 if stage == "simplified-logical": 933 return expr File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:447, in Expr.simplify(self) 445 while True: 446 dependents = collect_dependents(expr) --> 447 new = expr.simplify_once(dependents=dependents, simplified={}) 448 if new._name == expr._name: 449 break File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:417, in Expr.simplify_once(self, dependents, simplified) 414 if isinstance(operand, Expr): 415 # Bandaid for now, waiting for Singleton 416 dependents[operand._name].append(weakref.ref(expr)) --> 417 new = operand.simplify_once( 418 dependents=dependents, simplified=simplified 419 ) 420 simplified[operand._name] = new 421 if new._name != operand._name: File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:417, in Expr.simplify_once(self, dependents, simplified) 414 if isinstance(operand, Expr): 415 # Bandaid for now, waiting for Singleton 416 dependents[operand._name].append(weakref.ref(expr)) --> 417 new = operand.simplify_once( 418 dependents=dependents, simplified=simplified 419 ) 420 simplified[operand._name] = new 421 if new._name != operand._name: [... skipping similar frames: Expr.simplify_once at line 417 (2943 times)] File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:417, in Expr.simplify_once(self, dependents, simplified) 414 if isinstance(operand, Expr): 415 # Bandaid for now, waiting for Singleton 416 dependents[operand._name].append(weakref.ref(expr)) --> 417 new = operand.simplify_once( 418 dependents=dependents, simplified=simplified 419 ) 420 simplified[operand._name] = new 421 if new._name != operand._name: File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:390, in Expr.simplify_once(self, dependents, simplified) 387 expr = self 389 while True: --> 390 out = expr._simplify_down() 391 if out is None: 392 out = expr File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/dataframe/dask_expr/_expr.py:1977, in Assign._simplify_down(self) 1974 if self._check_for_previously_created_column(self.frame): 1975 # don't squash if we are using a column that was previously created 1976 return -> 1977 return Assign(*self._remove_common_columns(self.frame)) 1978 elif isinstance(self.frame, Projection) and isinstance( 1979 self.frame.frame, Assign 1980 ): 1981 if self._check_for_previously_created_column(self.frame.frame): File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:875, in SingletonExpr.__new__(cls, _determ_token, *args, **kwargs) 873 if not hasattr(cls, "_instances"): 874 cls._instances = weakref.WeakValueDictionary() --> 875 inst = super().__new__(cls, *args, _determ_token=_determ_token, **kwargs) 876 _name = inst._name 877 if _name in cls._instances and cls.__init__ == object.__init__: File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:72, in Expr.__new__(cls, _determ_token, *args, **kwargs) 69 inst.operands = [_unpack_collections(o) for o in operands] 70 # This is typically cached. Make sure the cache is populated by calling 71 # it once ---> 72 inst._name 73 return inst File ~/.conda/envs/py311/lib/python3.11/functools.py:1001, in cached_property.__get__(self, instance, owner) 999 val = cache.get(self.attrname, _NOT_FOUND) 1000 if val is _NOT_FOUND: -> 1001 val = self.func(instance) 1002 try: 1003 cache[self.attrname] = val File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/dataframe/dask_expr/_expr.py:604, in Blockwise._name(self) 602 else: 603 head = funcname(type(self)).lower() --> 604 return head + "-" + self.deterministic_token File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:538, in Expr.deterministic_token(self) 533 @property 534 def deterministic_token(self): 535 if not self._determ_token: 536 # Just tokenize self to fall back on __dask_tokenize__ 537 # Note how this differs to the implementation of __dask_tokenize__ --> 538 self._determ_token = self.__dask_tokenize__() 539 return self._determ_token File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/_expr.py:147, in Expr.__dask_tokenize__(self) 141 def __dask_tokenize__(self): 142 if not self._determ_token: 143 # If the subclass does not implement a __dask_tokenize__ we'll want 144 # to tokenize all operands. 145 # Note how this differs to the implementation of 146 # Expr.deterministic_token --> 147 self._determ_token = _tokenize_deterministic(type(self), *self.operands) 148 return self._determ_token File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:457, in _tokenize_deterministic(*args, **kwargs) 455 def _tokenize_deterministic(*args, **kwargs) -> str: 456 # Utility to be strict about deterministic tokens --> 457 return tokenize(*args, ensure_deterministic=True, **kwargs) File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:76, in tokenize(ensure_deterministic, *args, **kwargs) 74 token = _ENSURE_DETERMINISTIC.set(ensure_deterministic) 75 try: ---> 76 return _tokenize(*args, **kwargs) 77 finally: 78 if token: File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:34, in _tokenize(*args, **kwargs) 33 def _tokenize(*args: object, **kwargs: object) -> str: ---> 34 token: object = _normalize_seq_func(args) 35 if kwargs: 36 token = token, _normalize_seq_func(sorted(kwargs.items())) File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:154, in _normalize_seq_func(seq) 152 _SEEN[id(seq)] = len(_SEEN), seq 153 try: --> 154 return tuple(map(_inner_normalize_token, seq)) 155 finally: 156 del _SEEN[id(seq)] File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:147, in _normalize_seq_func.<locals>._inner_normalize_token(item) 145 if isinstance(item, _IDENTITY_DISPATCH): 146 return item --> 147 return normalize_token(item) File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:212, in normalize_object(o) 210 return _normalize_pickle(o) 211 except Exception: --> 212 _maybe_raise_nondeterministic( 213 f"Object {o!r} cannot be deterministically hashed. This likely " 214 "indicates that the object cannot be serialized deterministically." 215 ) 216 return uuid.uuid4().hex File ~/.conda/envs/py311/lib/python3.11/site-packages/dask/tokenize.py:89, in _maybe_raise_nondeterministic(msg) 87 val = None 88 if val or val is None and config.get("tokenize.ensure-deterministic"): ---> 89 raise TokenizationError(msg) TokenizationError: Object <class 'dask.dataframe.dask_expr._expr.Assign'> cannot be deterministically hashed. This likely indicates that the object cannot be serialized deterministically.
最新发布
08-29
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值