某日想导出对象存储某个bucket里的一些文件,发觉使用s3cmd无法正常ls,由于bucket太大。
改使用boto列出文件名称再下载,记录一下
import boto
import boto.s3.connection
class CephS3():
def __init__(self):
access_key = '你的key'
secret_key = '你的secret'
IP = '192.168.1.1'
PORT = 7480
self.conn = boto.connect_s3(
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
host=IP,
port=PORT,
is_secure=False, # 不验证ssl
calling_format=boto.s3.connection.OrdinaryCallingFormat(),
)
def get_bucket(self):
# 获取存储桶
for bucket in self.conn.get_all_buckets():
print ("{name}\t{created}".format(name = bucket.name,created = bucket.creation_date))
def list_bucket_file(self):
#列出指定bucket内的文件名称
bucket = self.conn.get_bucket('resbucket0')
for file in bucket.list():
print(file.name.encode('utf-8'))
def set_public_permission(self, bucket_name='test'):
"""设置指定存储桶中所有文件的权限为public-read"""
bucket = self.conn.get_bucket(bucket_name)
# 首先设置整个存储桶的权限为public-read
try:
bucket.set_acl('public-read')
print(f"存储桶 {bucket_name} 已设置为公共访问权限")
except Exception as e:
print(f"设置存储桶权限错误: {str(e)}")
# 遍历所有文件并设置权限
count = 0
for key in bucket.list():
try:
# 设置每个文件的ACL为public-read
key.set_acl('public-read')
count += 1
except Exception as e:
print(f"设置文件权限错误 {key.name}: {str(e)}")
print(f"已完成! 共设置了 {count} 个文件的公共访问权限")
obj=CephS3()
obj.get_bucket()
obj.list_bucket_file()
# -*- coding:utf-8 -*-
import boto3
from botocore.exceptions import ClientError
import logging
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CephS3:
def __init__(self):
access_key = 'accesskey'
secret_key = 'secretkey'
endpoint_url = 'http://cephfile.test.com'
self.s3_client = boto3.client(
's3',
endpoint_url=endpoint_url,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key
)
self.s3_resource = boto3.resource(
's3',
endpoint_url=endpoint_url,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key
)
def get_buckets(self):
"""列出所有存储桶"""
try:
response = self.s3_client.list_buckets()
for bucket in response['Buckets']:
logger.info(f"{bucket['Name']}\t{bucket['CreationDate']}")
except ClientError as e:
logger.error(f"获取存储桶列表失败: {e}")
def list_bucket_files(self, bucket_name='resbucket0'):
"""列出指定存储桶中的文件"""
try:
paginator = self.s3_client.get_paginator('list_objects_v2')
page_iterator = paginator.paginate(Bucket=bucket_name)
for page in page_iterator:
if 'Contents' in page:
for obj in page['Contents']:
logger.info(obj['Key'])
except ClientError as e:
logger.error(f"列出文件失败: {e}")
def set_public_permission(self, bucket_name='test', max_workers=10):
"""设置指定存储桶及其下所有对象为 public-read 权限"""
# 设置存储桶 ACL
try:
self.s3_client.put_bucket_acl(Bucket=bucket_name, ACL='public-read')
logger.info(f"存储桶 {bucket_name} 已设置为 public-read")
except ClientError as e:
logger.error(f"设置存储桶权限错误: {e}")
# 获取对象列表
try:
bucket = self.s3_resource.Bucket(bucket_name)
count = 0
def set_object_acl(obj):
nonlocal count
try:
self.s3_client.put_object_acl(Bucket=bucket_name, Key=obj.key, ACL='public-read')
# obj.Acl().put(ACL='public-read')
count += 1
return True
except ClientError as e:
logger.error(f"设置文件权限错误 {obj.key}: {e.response['Error']}") # 更详细错误
# logger.error(f"设置文件权限错误 {obj.key}: {e}")
return False
from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(set_object_acl, obj) for obj in bucket.objects.all()]
for future in futures:
future.result() # 等待完成
logger.info(f"已完成! 共设置了 {count} 个文件的公共访问权限")
except ClientError as e:
logger.error(f"遍历对象时出错: {e}")
if __name__ == '__main__':
ceph = CephS3()
# ceph.get_buckets()
# ceph.list_bucket_files(bucket_name='test')
ceph.set_public_permission(bucket_name='resbucket9', max_workers=10)