删除 Azure Data Lake Storage Gen2 中的blob对象
import os
import time
from retrying import retry
from azure. storage. blob import BlobServiceClient
class DirectoryClient :
def __init__ ( self, connection_string, container_name) :
service_client = BlobServiceClient. from_connection_string( connection_string)
self. client = service_client. get_container_client( container_name)
def ls_files ( self, path, recursive= False ) :
"""
列举当前路径下所有文件
@params1:path 路径
@params2: recursive 是否递归
"""
if not path == '' and not path. endswith( '/' ) :
path += '/'
blob_iter = self. client. list_blobs( name_starts_with= path)
files = [ ]
for blob in blob_iter:
relative_path = os. path. relpath( blob. name, path)
if recursive or not '/' in relative_path:
files. append( relative_path)
return files
def ls_dirs ( self, path, recursive= False ) :
"""
列举当前路径下所有子路径
"""
if not path == '' and not path. endswith( '/' ) :
path += '/'
blob_iter = self. client. list_blobs( name_starts_with= path)
dirs = [ ]
for blob in blob_iter:
relative_dir = os. path. dirname( os. path. relpath( blob. name, path) )
if relative_dir and ( recursive or not '/' in relative_dir) and not relative_dir in dirs:
dirs. append( relative_dir)
return dirs
def rm ( self, path, recursive= False ) :
"""
删除指定路径文件
"""
if recursive:
self. rmdir( path)
else :
print ( f'Deleting { path} ' )
self. client. delete_blob( path)
def rmdir ( self, path) :
"""
递归删除指定路径下所有内容(子路径/文件)
"""
blobs = self. ls_files( path, recursive= True )
if not blobs:
return
if not path == '' and not path. endswith( '/' ) :
path += '/'
blobs_list = [ path + blob for blob in blobs]
blobs_length = len ( blobs_list)
if blobs_length <= 200 :
self. client. delete_blobs( * blobs_list)
else :
start = 0
end = 250
while end <= blobs_length:
self. client. delete_blobs( * blobs_list[ start: end] )
start = start + 200
end = end + 200
if start < blobs_length and end > blobs_length:
self. client. delete_blobs( * blobs_list[ start: blobs_length] )
print ( path + ':blob 删除完成' )
def droptable ( self, dbName, tableName) :
spark. sql( f'DROP TABLE IF EXISTS { dbName} . { tableName} ' )
print ( '{0}.{1} 删除完成' . format ( dbName, tableName) )
@retry ( stop_max_attempt_number= 20 , wait_incrementing_increment= 200 )
def main ( ) :
blob_connect_string = 'DefaultEndpointsProtocol=https;AccountName=aalsabddev01e2;AccountKey=xxxxxxxxxxxxxxxxxxx==;EndpointSuffix=core.chinacloudapi.cn'
container_name = 'dwm-storage'
dbName = 'dwm_dev'
Client = DirectoryClient( connection_string= blob_connect_string, container_name= container_name)
del_table_list = [ 'm02_iap_track_logon_evt' , 'm02_iap_track_vist_evt' , 'm02_iap_track_page_clos_evt' ]
for table in del_table_list:
Client. rmdir( table)
Client. droptable( dbName, table)