scrapy 的images pipeline 默认将图片转换成通用的格式(JPG)和模式(RGB)
Come across to this thread. Here's my MyImagesPipeline to download full size gif images by overriding image_downloaded
.
class MyImagesPipeline(ImagesPipeline):
def check_gif(self, image):
if image.format == 'GIF':
return True
# The library reads GIF87a and GIF89a versions of the GIF file format.
return image.info.get('version') in ['GIF89a', 'GIF87a']
def persist_gif(self, key, data, info):
root, ext = os.path.splitext(key)
key = key + '.gif'
absolute_path = self.store._get_filesystem_path(key)
self.store._mkdir(os.path.dirname(absolute_path), info)
f = open(absolute_path, 'wb') # use 'b' to write binary data.
f.write(data)
def image_downloaded(self, response, request, info):
checksum = None
for key, image, buf in self.get_images(response, request, info):
if checksum is None:
buf.seek(0)
checksum = md5sum(buf)
if key.startswith('full') and self.check_gif(image):
# Save gif from response directly.
self.persist_gif(key, response.body, info)
else:
self.store.persist_image(key, image, buf, info)
return checksum