将A保留序号rn=(y-1)*len(n_list)+x-1,bit_value按2^rn计算,将c按rn排序,然后生成式中if c_row[‘bit’] > b_row[1] 条件可以改为for c_row in C[b_row[1]+1:],不做别的。
上述说明有一个错误,b_row[1]是二的幂,而不是序号,导致给出的代码有错,手工改过来了。
def main():
# with n as (select level n from dual connect by level<=5)
n_list = [i for i in range(1, 8)] # level从1开始
n_size = len(n_list)
# A as (select n1.n x, n2.n y, power(2,rownum-1) bit from n n1,n n2)
A = []
for x in n_list:
for y in n_list:
rn = (y - 1) * n_size + (x - 1) # 序号rn=(y-1)*len(n_list)+x-1
bit_value = 2 ** rn # bit_value按2^rn计算
A.append({'x': x, 'y': y, 'bit': bit_value, 'rn': rn})
# 按rn排序A
A.sort(key=lambda a: a['rn'])
# C as (select a1.bit,a1.x,a1.y, sum(a2.bit) bit2 from a a1,a a2 where (a1.x-a2.x)*(a1.y-a2.y)<=0 group by a1.bit,a1.x,a1.y)
C = []
for a1 in A:
bit2_sum = 0
for a2 in A:
if (a1['x'] - a2['x']) * (a1['y'] - a2['y']) <= 0:
bit2_sum += a2['bit']
C.append({
'bit': a1['bit'],
'x': a1['x'],
'y': a1['y'],
'bit2': bit2_sum,
'rn': a1['rn']
})
# 按rn排序C
C.sort(key=lambda c: c['rn'])
# 递归CTE b(cnt,bit,bit2) - 用列表代替字典
# 初始部分: select 1,c.bit,c.bit2 from c
b = [[1, c_row['bit'], c_row['bit2'], c_row['rn']] for c_row in C] #用第4个元素保存序号
# 递归部分: 模拟递归CTE
changed = True
while changed:
changed = False
# 用生成式代替两重for循环,使用切片优化
new_rows = [
[b_row[0] + 1, b_row[1] + c_row['bit'], b_row[2] & c_row['bit2'], c_row['rn']]
for b_row in b
for c_row in C[b_row[3] + 1:] # 使用切片代替条件判断
if (b_row[2] & c_row['bit']) > 0
]
if new_rows:
b = new_rows # 覆盖而不是扩展
changed = True
# 找到cnt最大的行: select * from (select b.*,rank() over(order by cnt desc) rnk from b) where rnk=1
if not b:
return
max_cnt = max(row[0] for row in b)
max_rows = [row for row in b if row[0] == max_cnt]
# 为每个最大集合生成点坐标字符串
results = []
for r in max_rows:
points = []
for a_row in A:
# where bitand(r.bit, a.bit) > 0
if r[1] & a_row['bit'] > 0:
points.append(f"({a_row['x']},{a_row['y']})")
# listagg within group(order by r.bit) - 这里按bit值排序可能没有意义,改为按坐标排序
points.sort()
results.append(''.join(points))
# 输出结果
print(f"最大集合大小: {max_cnt}")
print(f"找到 {len(results)} 个最大集合:")
#for i, result in enumerate(results, 1):
# print(f"{i}: {result}")
if __name__ == "__main__":
main()
大幅度减少了循环次数,只要前一版本1/3的时间。
C:\d>timer64 pypy/pypy sql2py6.py
鏈€澶ч泦鍚堝ぇ灏? 13
鎵惧埌 924 涓渶澶ч泦鍚?
Kernel Time = 0.046 = 13%
User Time = 0.296 = 87%
Process Time = 0.343 = 101% Virtual Memory = 127 MB
Global Time = 0.337 = 100% Physical Memory = 124 MB
C:\d>timer64 pypy/pypy sql2py5.py
鏈€澶ч泦鍚堝ぇ灏? 13
鎵惧埌 924 涓渶澶ч泦鍚?
Kernel Time = 0.031 = 3%
User Time = 0.718 = 71%
Process Time = 0.750 = 74% Virtual Memory = 114 MB
Global Time = 1.008 = 100% Physical Memory = 111 MB
C:\d>timer64 python sql2py6.py
最大集合大小: 13
找到 924 个最大集合:
Kernel Time = 0.046 = 5%
User Time = 0.765 = 84%
Process Time = 0.812 = 89% Virtual Memory = 99 MB
Global Time = 0.903 = 100% Physical Memory = 103 MB
C:\d>timer64 python sql2py5.py
最大集合大小: 13
找到 924 个最大集合:
Kernel Time = 0.015 = 0%
User Time = 2.468 = 98%
Process Time = 2.484 = 99% Virtual Memory = 96 MB
Global Time = 2.493 = 100% Physical Memory = 101 MB

1153

被折叠的 条评论
为什么被折叠?



