最近因为需要对大规模的文件进行存储,选了多种对象存储方案,最终选择了MinIO,为了方便python的调用,在minio第三方包的基础上进行进一步封装调用,该工具除了基础的功能外,还封装了多线程分片下载文件和上传文件的功能,切片设置不宜过大,因为会受限于机器的带宽,过大会导致带宽被占光影响机器性能。分享的代码仅供学习使用。
import os
import io
from minio import Minio
from minio.error import S3Error
from datetime import timedelta
from tqdm import tqdm
from minio.deleteobjects import DeleteObject
from concurrent.futures import as_completed, ThreadPoolExecutor
class Bucket(object):
client = None
policy = '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"AWS":["*"]},"Action":["s3:GetBucketLocation","s3:ListBucket"],"Resource":["arn:aws:s3:::%s"]},{"Effect":"Allow","Principal":{"AWS":["*"]},"Action":["s3:GetObject"],"Resource":["arn:aws:s3:::%s/*"]}]}'
def __new__(cls, *args, **kwargs):
if not cls.client:
cls.client = object.__new__(cls)
return cls.client
def __init__(self, service, access_key, secret_key, secure=False, section_size=10, t_max=3):
'''
实例化参数
:param service: 服务器地址
:param access_key: access_key
:param secret_key: secret_key
:param secure: secure
:param section_size: 切片大小mb
:param t_max: 线程池大小
'''
self.service = service
self.client = Minio(service, access_key=access_key, secret_key=secret_key, secure=secure)
self.size = section_size * 1024 * 1024
self.processPool = ThreadPoolExecutor(max_workers=t_max)
def exists_bucket(self, bucket_name):
"""
判断桶是否存在
:param bucket_name: 桶名称
:return:
"""
return self.client.bucket_exists(bucket_name=bucket_name)
def create_bucket(self, bucket_name: str, is_policy: bool=True):
"""
创建桶 + 赋予策略
:param bucket_name: 桶名
:param is_policy: 策略
:return:
"""
if self.exists_bucket(bucket_name=bucket_name):
return False
else:
self.client.make_bucket(bucket_name=bucket_name)
if is_policy:
policy = self.policy % (bucket_name, bucket_name