import time
# 4.1、手动遍历迭代器
# 通过next()和StopIteration异常,取代for
items = [1,2,3]
it = iter(items)
next(it)
1
# 4.2 代理迭代
# __iter__() 和 __str__() 和 __repr() 用法
class Node:
def __init__(self, value):
self._value = value
self._children = []
def __repr__(self):
return 'Node({!r})'.format(self._value)
def add_child(self, node):
self._children.append(node)
def __iter__(self):
return iter(self._children)
# Example
if __name__ == '__main__':
root = Node(0)
child1 = Node(1)
child2 = Node(2)
root.add_child(child1)
root.add_child(child2)
# Outputs Node(1), Node(2)
for ch in root:
print(ch)
Node(1)
Node(2)
# 使用生成器创建新的迭代模式
def frange(start, stop, increment):
x = start
while x < stop:
yield x
x += increment
for n in frange(0, 4, 0.5):
print(n)
0
0.5
1.0
1.5
2.0
2.5
3.0
3.5
# 4.4 实现迭代器协议
class Node:
def __init__(self, value):
self._value = value
self._children = []
def __repr__(self):
return 'Node({!r})'.format(self._value)
def add_child(self, node):
self._children.append(node)
def __iter__(self):
return iter(self._children)
def depth_first(self):
yield self
for c in self:
yield from c.depth_first()
# Example
if __name__ == '__main__':
root = Node(0)
child1 = Node(1)
child2 = Node(2)
root.add_child(child1)
root.add_child(child2)
child1.add_child(Node(3))
child1.add_child(Node(4))
child2.add_child(Node(5))
for ch in root.depth_first():
print(ch)
# Outputs Node(0), Node(1), Node(3), Node(4), Node(2), Node(5)
Node(0)
Node(1)
Node(3)
Node(4)
Node(2)
Node(5)
# 4.5 方向迭代
# reversed,前提是对象实现了__reversed__()特殊方法,否则需要先转list
# 迭代对象过大时,转list会消耗大量内存,这时可以定义一个反向迭代器
class Countdown:
def __init__(self, start):
self.start = start
def __iter__(self):
n = self.start
while n > 0:
yield n
n -= 1
def __reversed__(self):
n = 1
while n <= self.start:
yield n
n += 1
for rr in reversed(Countdown(5)):
print(rr)
print('\n')
for rr in Countdown(5):
print(rr)
1
2
3
4
5
5
4
3
2
1
# 4.6、带有外部状态的生成器函数
# 小地方注意,该类不是iterator,不适用for而是直接next()会报错,需要先调用iter()函数
from collections import deque
class linehistory:
def __init__(self, lines, histlen=3):
self.lines = lines
self.history = deque(maxlen=histlen)
def __iter__(self):
for lineno, line in enumerate(self.lines, 1):
self.history.append((lineno, line))
yield line
def clear(self):
self.history.clear()
# with open('somefile.txt') as f:
# lines = linehitory(f)
lines = linehistory(['c', 'c++', 'java', 'python','php'])
for line in lines:
if 'python' in line:
for lineno, line in lines.history:
print(f'{lineno}:{line}', end=' ')
2:c++ 3:java 4:python
# 4.7 迭代器切片
# 标准切片是不能实现
# 使用itertools.islice(),是左闭右开,该过程是不可逆的类似cursor和zip()
def count(n):
while True:
yield n
n += 1
c = count(0)
# c[10:20] # TypeEroor
import itertools
for x in itertools.islice(c, 10, 15):
print(x)
10
11
12
13
14
# 4.8 跳过可迭代对象的开始部分
# 适用于事先不知道大小的对象,生成器,文件等
# itertools.dropwhile 仅仅跳过开始满足测试条件的行,如果第一行匹配不到就结束
# 已知位置的时候用islice
from itertools import dropwhile
# with open('/etc/passwd') as f:
# for line in dropwhile(lambda line: line.startswith('#'),f):
# print(line, end='')
demo = """# Note that this file is consulted directly only when the system is running
# in single-user mode. At other times, this information is provided by
# Open Directory.
nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false
root:*:0:0:System Administrator:/var/root:/bin/sh
"""
# demo.split('\n')
d = demo.split('\n')
# 这里有个小坑,必须要符合条件,即如果第一个元素是空字符,无法跳过注释
for line in dropwhile(lambda line: line.startswith('#'), d):
print(line)
nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false
root:*:0:0:System Administrator:/var/root:/bin/sh
# 4.9 排列组合的迭代 即遍历一个集合中所有元素可能的排列和组合
# itertools.permutations 结果为元祖,可以指定长度参数,有顺序但不重复
# itertools.combinations 顺序不重要,在计算组合的时候,一旦元素被选取就会从候选中剔除掉
# itertools.combinations_with_replacement() 允许同一个元素被选择多次
from itertools import permutations, combinations, combinations_with_replacement
iters = ['a', 'b', 'c']
for c in permutations(iters, 3):
print('permutations: ', c)
for c in permutations(iters, 2):
print('permutations: ', c)
for c in combinations(iters, 3):
print('____combinations: ',c)
for c in combinations(iters, 2):
print('____combinations: ',c)
for c in combinations_with_replacement(iters, 3):
print('cwr: ', c)
permutations: ('a', 'b', 'c')
permutations: ('a', 'c', 'b')
permutations: ('b', 'a', 'c')
permutations: ('b', 'c', 'a')
permutations: ('c', 'a', 'b')
permutations: ('c', 'b', 'a')
permutations: ('a', 'b')
permutations: ('a', 'c')
permutations: ('b', 'a')
permutations: ('b', 'c')
permutations: ('c', 'a')
permutations: ('c', 'b')
____combinations: ('a', 'b', 'c')
____combinations: ('a', 'b')
____combinations: ('a', 'c')
____combinations: ('b', 'c')
cwr: ('a', 'a', 'a')
cwr: ('a', 'a', 'b')
cwr: ('a', 'a', 'c')
cwr: ('a', 'b', 'b')
cwr: ('a', 'b', 'c')
cwr: ('a', 'c', 'c')
cwr: ('b', 'b', 'b')
cwr: ('b', 'b', 'c')
cwr: ('b', 'c', 'c')
cwr: ('c', 'c', 'c')
# 4.10 序列上索引值类型 即迭代的同时跟踪索引
# enumerate(), 接受开始的参数
for idx,val in enumerate(['a','b','c']):
print(idx, val)
print('*'*20)
for idx,val in enumerate(['a','b','c'], 1):
print(idx, val)
# 遍历文件时在错误消息中使用行号定位
demo = """# Note that this file is consulted directly only when the system is running
# in single-user mode. At other times, this information is provided by
# Open Directory.
nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false
root:*:0:0:System Administrator:/var/root:/bin/sh"""
d = demo.split('\n')
for lineno, line in enumerate(d):
fields = line.split()
try:
count = int(fields[1])
except ValueError as e:
print(f'Line {lineno}: Parse error: {e}')
# 迭代一个元祖需注意
data = [(1,2), (3,4)]
for n, (x,y) in enumerate(data):
print(n, x,y)
# error
# for n, x,y in enumerate(data):
# print(n, x,y)
0 a
1 b
2 c
********************
1 a
2 b
3 c
Line 0: Parse error: invalid literal for int() with base 10: 'Note'
Line 1: Parse error: invalid literal for int() with base 10: 'in'
Line 2: Parse error: invalid literal for int() with base 10: 'Open'
Line 3: Parse error: invalid literal for int() with base 10: 'User:/var/empty:/usr/bin/false'
Line 4: Parse error: invalid literal for int() with base 10: 'Administrator:/var/root:/bin/sh'
0 1 2
1 3 4
# 4.11 同时迭代多个序列
# zip()
# itertools.zip_longest() 可多次使用
headers = ['name', 'shares', 'price']
values = ['ACME', 100, 490.1]
from itertools import zip_longest
for x,y in zip(headers, values):
print(x,y)
for x,y in zip_longest(headers,values):
print(x,y)
for x,y in zip_longest(headers,values):
print(x,y)
name ACME
shares 100
price 490.1
name ACME
shares 100
price 490.1
name ACME
shares 100
price 490.1
print(dict(zip(headers,values)))
print(list(zip(headers, values)))
{'name': 'ACME', 'shares': 100, 'price': 490.1}
[('name', 'ACME'), ('shares', 100), ('price', 490.1)]
# 4.12 不同集合上元素的迭代
# 适用在不同集合,执行相同操作,减少重复循环
# itertools.chain() 比先将序列合并再迭代更高效
from itertools import chain
a = [1,2,3,4]
b = ['x', 'y', 'z']
for x in chain(a, b):
# Process item
print(x)
1
2
3
4
x
y
z
# 4.13 创建数据处理管道
# 中间有使用到fnmatch.filter, endswith, os.walk, yield from等
# 然后就是yield生产者,for消费者模式
# 4.14 展开嵌套的序列,将多层嵌套展开为单层列表
# isinstance(x, ignore_types),防止字符串被迭代
from collections import Iterable
def flatten(items, ignore_types=(str, bytes)):
for x in items:
if isinstance(x, Iterable) and not isinstance(x, ignore_types):
yield from flatten(x)
else:
yield x
items = [1, 2, [3, 'sdfsdf', [5, 6], 7], 8]
# Produces 1 2 3 4 5 6 7 8
for x in flatten(items):
print(x)
1
2
3
sdfsdf
5
6
7
8
# 4.15 顺序迭代合并后的排序迭代对象
# heapq.merge(),可以对非常大的两个file进行操作,它是有可迭代特性
# heapq.merge() 需要所有输入序列必须是排过序,它只会检测开始最小的数
import heapq
a = [1, 4, 7, 10]
b = [2, 5, 6, 11]
for c in heapq.merge(a,b):
print(c)
1
2
4
5
6
7
10
11
# 4.15 迭代器代替while无限循环
# iter(callable, 标记值)
import sys
f = open('passwd.txt')
for chunk in iter(lambda: f.read(10), b'\n'):
n = sys.stdout.write(chunk)
nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false
root:*:0:0:System Administrator:/var/root:/bin/sh
daemon:*:1:1:System Services:/var/root:/usr/bin/false
_uucp:*:4:4:Unix to Unix Copy Protocol:/var/spool/uucp:/usr/sbin/uucico
1nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false
root:*:0:0:System Administrator:/var/root:/bin/sh
daemon:*:1:1:System Services:/var/root:/usr/bin/false
_uucp:*:4:4:Unix to Unix Copy Protocol:/var/spool/uucp:/usr/sbin/uucico
2nobody::-2:-2:Unprivileged User:/var/empty:/usr/bin/false
root::0:0:System Administrator:/var/root:/bin/sh
daemon::1:1:System Services:/var/root:/usr/bin/false
_uucp::4:4:Unix to Unix Copy Protocol:/var/spool/uucp:/usr/sbin/uucico