python备份mongodb数据库全量备份
2025-12-02
import os
import json
from pymongo import MongoClient
from bson import ObjectId, datetime
from typing import Optional, Dict, List
class MongoJSONEncoder(json.JSONEncoder):
"""解决 MongoDB 特殊数据类型(ObjectId、datetime 等)无法直接序列化的问题"""
def default(self, obj):
if isinstance(obj, ObjectId):
return str(obj)
elif isinstance(obj, datetime.datetime):
return obj.strftime("%Y-%m-%d %H:%M:%S")
elif isinstance(obj, datetime.date):
return obj.strftime("%Y-%m-%d")
elif isinstance(obj, bytes):
return obj.decode("utf-8", errors="ignore")
elif isinstance(obj, (set, frozenset)):
return list(obj)
return super(MongoJSONEncoder, self).default(obj)
def export_mongodb_to_json(
mongo_uri: str = "mongodb://localhost:27017/",
username: Optional[str] = None,
password: Optional[str] = None,
auth_db: str = "admin",
output_root: str = "./mongodb_export",
batch_size: int = 1000,
indent: int = 2,
) -> None:
"""
导出 MongoDB 所有数据库和集合到 JSON 文件
参数说明:
- mongo_uri: MongoDB 连接地址(默认本地)
- username: 认证用户名(可选)
- password: 认证密码(可选)
- auth_db: 认证数据库(默认 admin)
- output_root: 导出根目录(默认 ./mongodb_export)
- batch_size: 分批导出大小(避免大数据量占用过多内存)
- indent: JSON 格式化缩进(0 为紧凑模式,默认 2 空格)
"""
try:
client_kwargs = {}
if username and password:
client_kwargs["username"] = username
client_kwargs["password"] = password
client_kwargs["authSource"] = auth_db
client = MongoClient(mongo_uri, **client_kwargs)
print(f"✅ 成功连接 MongoDB: {mongo_uri}")
except Exception as e:
print(f"❌ MongoDB 连接失败: {str(e)}")
return
os.makedirs(output_root, exist_ok=True)
print(f"📂 导出根目录: {os.path.abspath(output_root)}")
system_dbs = {"admin", "local", "config"}
all_dbs = [db for db in client.list_database_names() if db not in system_dbs]
if not all_dbs:
print("⚠️ 未找到非系统数据库")
client.close()
return
print(f"📊 发现 {len(all_dbs)} 个非系统数据库: {all_dbs}")
for db_name in all_dbs:
db_dir = os.path.join(output_root, db_name)
os.makedirs(db_dir, exist_ok=True)
print(f"\n========== 正在导出数据库: {db_name} (目录: {db_dir}) ==========")
db = client[db_name]
collections = [
col for col in db.list_collection_names() if not col.startswith("system.")
]
if not collections:
print(f"⚠️ 数据库 {db_name} 无有效集合,跳过")
continue
for col_name in collections:
col = db[col_name]
total_count = col.count_documents({})
print(f"📄 集合 {col_name} (共 {total_count} 条数据)")
json_file = os.path.join(db_dir, f"{col_name}.json")
with open(json_file, "w", encoding="utf-8") as f:
f.write("[")
first_batch = True
for skip in range(0, total_count, batch_size):
batch_docs = list(col.find().skip(skip).limit(batch_size))
for doc in batch_docs:
if not first_batch:
f.write(",")
json.dump(
doc,
f,
cls=MongoJSONEncoder,
ensure_ascii=False,
indent=indent,
)
first_batch = False
if indent == 0:
f.write("\n")
f.write("]")
print(f"✅ 导出完成: {json_file}")
client.close()
print(f"\n🎉 所有数据库导出完成!根目录: {os.path.abspath(output_root)}")
if __name__ == "__main__":
CONFIG = {
"mongo_uri": "mongodb://192.168.28.99:27017/",
"username": None,
"password": None,
"auth_db": "admin",
"output_root": "./mongodb_export",
"batch_size": 1000,
"indent": 4,
}
export_mongodb_to_json(**CONFIG)