1。迭代器都可以用for循环遍历,还可以以下方式:
with open('/etc/passwd') as f:
try:
while True:
line = next(f)
print(line, end='')
except StopIteration:
pass
with open('/etc/passwd') as f:
while True:
line = next(f, None)
if line is None:
break
print(line, end='')
next()函数,支持接收自定义参数来替代返回StopIteration异常
2.自定义class要使用迭代器功能,需完善__iter__()函数,使该函数能返回可迭代对象
如:
class Node:
def __init__(self, value):
self._value = value
self._children = []
def __repr__(self):
return 'Node({!r})'.format(self._value)
def add_child(self, node):
self._children.append(node)
def __iter__(self):
return iter(self._children)
# Example
if __name__ == '__main__':
root = Node(0)
child1 = Node(1)
child2 = Node(2)
root.add_child(child1)
root.add_child(child2)
for ch in root:
print(ch)
# Outputs Node(1), Node(2)
3. 生成器yield使用。
详情参见:https://blog.youkuaiyun.com/Code_LT/article/details/107166915
>>> def countdown(n):
... print('Starting to count from', n)
... while n > 0:
... yield n
... n -= 1
... print('Done!')
...
>>> # Create the generator, notice no output appears
>>> c = countdown(3)
>>> c
<generator object countdown at 0x1006a0af0>
>>> # Run to first yield and emit a value
>>> next(c)
Starting to count from 3
3
>>> # Run to the next yield
>>> next(c)
2
>>> # Run to next yield
>>> next(c)
1
>>> # Run to next yield (iteration stops)
>>> next(c)
Done!
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
StopIteration
>>>
next()作用下,到停止时,会报StopIteration错误,for循环的话会自动处理这个异常,不需要担心。
4. yield from用法
详见:https://www.jianshu.com/p/87da832730f5
如果生成器函数需要产出另一个生成器生成的值,传统的解决方法是使用嵌套的for循环:
>>> def chain(*iterables):
... for it in iterables:
... for i in it:
... yield i
>>> s = 'ABC'
>>> t = tuple(range(3))
>>> list(chain(s, t))
['A', 'B', 'C', 0, 1, 2]
chain 生成器函数把操作依次交给接收到的各个可迭代对象处理。
Python3.3之后引入了新语法:
>>> def chain(*iterables):
... for i in iterables:
... yield from i
...
>>> list(chain(s, t))
['A', 'B', 'C', 0, 1, 2]
yield from
相当于用来替代 for 循环。yield from x
表达式对 x 对象所做的第一件事是,调用 iter(x),从中获取迭代器。因
此,x 可以是任何可迭代的对象。
使用示例:
class Node:
def __init__(self, value):
self._value = value
self._children = []
def __repr__(self):
return 'Node({!r})'.format(self._value)
def add_child(self, node):
self._children.append(node)
def __iter__(self):
return iter(self._children)
def depth_first(self): #先看下面两个yield的示例
yield self
for c in self:
yield from c.depth_first()
if __name__ == '__main__':
root = Node(0)
child1 = Node(1)
child2 = Node(2)
root.add_child(child1)
root.add_child(child2)
child1.add_child(Node(3))
child1.add_child(Node(4))
child2.add_child(Node(5))
for ch in root.depth_first():
print(ch)
# Outputs Node(0), Node(1), Node(3), Node(4), Node(2), Node(5)
第二个yield也会返回
def test():
for i in range(0,4):
yield i
yield i+1
t=test()
next(t)
Out[4]: 0
next(t)
Out[5]: 1
next(t)
Out[6]: 1
next(t)
Out[7]: 2
next(t)
Out[8]: 2
5. 当reversed(a),当a不能直接使用,需要先转成list时(耗费性能),可构造a的__reversed__()函数,实现逆序
Reversed iteration only works if the object in question has a size that can be determined or if the object implements a __reversed__() special method.
>>> a = [1, 2, 3, 4]
>>> for x in reversed(a):
... print(x)
6. 可在__iter__()函数中做其他操作,比如记录iter的某段过程:
def __iter__(self):
for lineno, line in enumerate(self.lines,1):
self.history.append((lineno, line)) #history为deque
yield line
deque详解:https://zhuanlan.zhihu.com/p/32201189
7. Iterator 和 generator 不能直接切片,但可用如下方式:
>>> import itertools
>>> for x in itertools.islice(c, 10, 20):
... print(x)
...
isslice()会返回一个iterator,但会消费掉原有Iterator和 generator,使得c不能被二次使用。
8. 跳过 Iterator 和 generator 的某些值
8.1 dropewhile(表达式,iterable):抛弃表达式为True的值,返回 Iterator
>>> from itertools import dropwhile
>>> with open('/etc/passwd') as f:
... for line in dropwhile(lambda line: line.startswith('#'), f):
... print(line, end='')
8.2 知道确切扔掉的数量时:
>>> from itertools import islice
>>> items = ['a', 'b', 'c', 1, 4, 10, 15]
>>> for x in islice(items, 3, None):
... print(x)
...
1
4
10
15
>>>
9.enumerate() 返回iterator
>>> my_list = ['a', 'b', 'c']
>>> for idx, val in enumerate(my_list):
... print(idx, val)
...
0 a
1 b
2 c
>>> my_list = ['a', 'b', 'c']
>>> for idx, val in enumerate(my_list, 1):
... print(idx, val)
...
1 a
2 b
3 c
10. zip() 返回iterator
>>> a = [1, 2, 3]
>>> b = ['w', 'x', 'y', 'z']
>>> for i in zip(a,b):
... print(i)
...
(1, 'w')
(2, 'x')
(3, 'y')
>>>
>>> from itertools import zip_longest
>>> for i in zip_longest(a,b):
... print(i)
...
(1, 'w')
(2, 'x')
(3, 'y')
(None, 'z')
>>> for i in zip_longest(a, b, fillvalue=0):
... print(i)
...
(1, 'w')
(2, 'x')
(3, 'y')
(0, 'z')
>>> a = [1, 2, 3]
>>> b = [10, 11, 12]
>>> c = ['x','y','z']
>>> for i in zip(a, b, c):
... print(i)
...
(1, 10, 'x')
(2, 11, 'y')
(3, 12, 'z')
11. chain() 用于遍历多个容器更高效,因为直接生成iterator 消费iterable输入。
>>> from itertools import chain
>>> a = [1, 2, 3, 4]
>>> b = ['x', 'y', 'z']
>>> for x in chain(a, b):
... print(x)
...
1
2
3
4
x
y
z
# 低效,因为+要先生成新序列
for x in a + b:
...
#高效
for x in chain(a, b):
...
12. 利用生成器构建pipeline,流式处理大批量数据,防止内存溢出
import os
import fnmatch
import gzip
import bz2
import re
def gen_find(filepat, top):
'''
Find all filenames in a directory tree that match a shell wildcard pattern
'''
for path, dirlist, filelist in os.walk(top):
for name in fnmatch.filter(filelist, filepat):
yield os.path.join(path, name)
def gen_opener(filenames):
'''
Open a sequence of filenames one at a time producing a file object.
The file is closed immediately when proceeding to the next iteration.
'''
for filename in filenames:
if filename.endswith('.gz'):
f = gzip.open(filename, 'rt')
elif filename.endswith('.bz2'):
f = bz2.open(filename, 'rt')
else:
f = open(filename, 'rt')
yield f
f.close()
def gen_concatenate(iterators):
'''
Chain a sequence of iterators together into a single sequence.
'''
for it in iterators:
yield from it
def gen_grep(pattern, lines):
'''
Look for a regex pattern in a sequence of lines
'''
pat = re.compile(pattern)
for line in lines:
if pat.search(line):
yield line
#使用:寻找含有python字串的line
lognames = gen_find('access-log*', 'www')
files = gen_opener(lognames)
lines = gen_concatenate(files)
pylines = gen_grep('(?i)python', lines)
for line in pylines:
print(line)
13. 用迭代器替代有结束条件的无限循环
CHUNKSIZE = 8192
def reader(s):
while True:
data = s.recv(CHUNKSIZE)
if data == b'':
break
process_data(data
可替换为:
def reader(s):
for chunk in iter(lambda: s.recv(CHUNKSIZE), b''):
process_data(data)
原因是iter()可接收一个不带参数的函数和一个结束值,迭代器反复调用该函数直到返回值为结束值时停止。
>>> import sys
>>> f = open('/etc/passwd')
>>> for chunk in iter(lambda: f.read(10), ''):
... n = sys.stdout.write(chunk)