import subprocess
import logging
# 配置日志记录
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename='hdfs_operations.log', filemode='a')
def run_command(command):
try:
logging.info(f"Executing command: {command}")
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
logging.info(f"Command output: {result.stdout}")
print(result.stdout)
except subprocess.CalledProcessError as e:
logging.error(f"Error executing command: {e}")
logging.error(f"Error details: {e.stderr}")
print(f"Error executing command: {e}")
print(e.stderr)
# 1. 基本文件系统操作
# 列出目录内容
def list_directory(hdfs_path):
try:
run_command(f"hdfs dfs -ls {hdfs_path}")
except Exception as e:
logging.error(f"Failed to list directory {hdfs_path}: {e}")
# 创建新目录
def create_directory(hdfs_path):
try:
run_command(f"hdfs dfs -mkdir {hdfs_path}")
except Exception as e:
logging.error(f"Failed to create directory {hdfs_path}: {e}")
# 上传本地文件到 HDFS
def upload_file(local_path, hdfs_path):
try:
run_command(f"hdfs dfs -put {local_path} {hdfs_path}")
except Exception as e:
logging.error(f"Failed to upload file {local_path} to {hdfs_path}: {e}")
# 从 HDFS 下载文件到本地
def download_file(hdfs_path, local_path):
try:
run_command(f"hdfs dfs -get {hdfs_path} {local_path}")
except Exception as e:
logging.error(f"Failed to download file {hdfs_path} to {local_path}: {e}")
# 删除文件或目录
def delete_path(hdfs_path, recursive=False):
try:
if recursive:
run_command(f"hdfs dfs -rm -r {hdfs_path}")
else:
run_command(f"hdfs dfs -rm {hdfs_path}")
except Exception as e:
logging.error(f"Failed to delete path {hdfs_path}: {e}")
# 查看文件内容
def view_file(hdfs_path):
try:
run_command(f"hdfs dfs -cat {hdfs_path}")
except Exception as e:
logging.error(f"Failed to view file {hdfs_path}: {e}")
# 2. 文件系统检查和维护
# 检查文件系统健康状况
def check_hdfs_health():
try:
run_command("hdfs fsck / -files -blocks -locations")
except Exception as e:
logging.error(f"Failed to check HDFS health: {e}")
# 查看 HDFS 集群报告
def hdfs_report():
try:
run_command("hdfs dfsadmin -report")
except Exception as e:
logging.error(f"Failed to get HDFS report: {e}")
# 进入或退出安全模式
def set_safemode(mode):
try:
if mode == "enter":
run_command("hdfs dfsadmin -safemode enter")
elif mode == "leave":
run_command("hdfs dfsadmin -safemode leave")
except Exception as e:
logging.error(f"Failed to set safemode to {mode}: {e}")
# 设置配额
def set_quota(hdfs_path, quota):
try:
run_command(f"hdfs dfsadmin -setQuota {quota} {hdfs_path}")
except Exception as e:
logging.error(f"Failed to set quota for {hdfs_path}: {e}")
def clear_quota(hdfs_path):
try:
run_command(f"hdfs dfsadmin -clrQuota {hdfs_path}")
except Exception as e:
logging.error(f"Failed to clear quota for {hdfs_path}: {e}")
# 3. 集群管理
# 平衡 HDFS 集群中的数据
def balance_hdfs():
try:
run_command("hdfs balancer")
except Exception as e:
logging.error(f"Failed to balance HDFS: {e}")
# 刷新节点配置
def refresh_nodes():
try:
run_command("hdfs dfsadmin -refreshNodes")
except Exception as e:
logging.error(f"Failed to refresh nodes: {e}")
# 4. 访问控制和权限管理
# 更改文件或目录权限
def change_permission(hdfs_path, permission):
try:
run_command(f"hdfs dfs -chmod {permission} {hdfs_path}")
except Exception as e:
logging.error(f"Failed to change permission for {hdfs_path}: {e}")
# 更改文件或目录所有者
def change_owner(hdfs_path, owner, group):
try:
run_command(f"hdfs dfs -chown {owner}:{group} {hdfs_path}")
except Exception as e:
logging.error(f"Failed to change owner for {hdfs_path}: {e}")
# 设置文件副本数
def set_replication(hdfs_path, replication):
try:
run_command(f"hdfs dfs -setrep -w {replication} {hdfs_path}")
except Exception as e:
logging.error(f"Failed to set replication for {hdfs_path}: {e}")
# 5. 快照管理
# 创建快照
def create_snapshot(hdfs_path, snapshot_name):
try:
run_command(f"hdfs dfs -createSnapshot {hdfs_path} {snapshot_name}")
except Exception as e:
logging.error(f"Failed to create snapshot for {hdfs_path}: {e}")
# 删除快照
def delete_snapshot(hdfs_path, snapshot_name):
try:
run_command(f"hdfs dfs -deleteSnapshot {hdfs_path} {snapshot_name}")
except Exception as e:
logging.error(f"Failed to delete snapshot for {hdfs_path}: {e}")
# 列出快照
def list_snapshots():
try:
run_command("hdfs dfs -lsSnapshottableDir")
except Exception as e:
logging.error(f"Failed to list snapshots: {e}")
# 6. 数据完整性和恢复
# 列出损坏的文件
def list_corrupt_files():
try:
run_command("hdfs fsck / -list-corruptfileblocks")
except Exception as e:
logging.error(f"Failed to list corrupt files: {e}")
# 恢复损坏的文件
def recover_file(source_path, target_path):
try:
run_command(f"hdfs dfs -cp {source_path} {target_path}")
except Exception as e:
logging.error(f"Failed to recover file from {source_path} to {target_path}: {e}")
# 7. 高级操作
# 格式化 NameNode(谨慎操作)
def format_namenode():
try:
run_command("hdfs namenode -format")
except Exception as e:
logging.error(f"Failed to format NameNode: {e}")
# 启动和停止 HDFS 守护进程
def start_hdfs():
try:
run_command("start-dfs.sh")
except Exception as e:
logging.error(f"Failed to start HDFS: {e}")
def stop_hdfs():
try:
run_command("stop-dfs.sh")
except Exception as e:
logging.error(f"Failed to stop HDFS: {e}")
# 8. 日志和监控
# 查看 NameNode 或 DataNode 日志
def view_logs(level, node_type):
try:
run_command(f"hdfs --loglevel {level} {node_type}")
except Exception as e:
logging.error(f"Failed to view logs for {node_type} at level {level}: {e}")
if __name__ == "__main__":
# 示例调用
try:
list_directory("/user/hadoop/directory")
create_directory("/user/hadoop/new_directory")
upload_file("/local/path/file.txt", "/user/hadoop/directory/")
hdfs_report()
except Exception as e:
logging.error(f"Unexpected error in main: {e}")
HDFS用于维护、监控和操作的命令python实现
最新推荐文章于 2025-06-08 02:52:26 发布