import imaplib
import email
from email.header import decode_header
import requests
import os
from urllib.parse import urlparse
class EmailLargeAttachmentDownloader:
def __init__(self, email_addr, password, imap_server, save_dir="large_attachments"):
"""
初始化下载器
:param email_addr: 邮箱地址(如 xxx@163.com)
:param password: 邮箱授权码(非登录密码)
:param imap_server: IMAP服务器地址(163: imap.163.com;QQ: imap.qq.com;Gmail: imap.gmail.com)
:param save_dir: 附件保存目录
"""
self.email_addr = email_addr
self.password = password
self.imap_server = imap_server
self.save_dir = save_dir
os.makedirs(save_dir, exist_ok=True) # 确保保存目录存在
def decode_subject(self, subject):
"""解码邮件主题(处理中文乱码)"""
decoded = decode_header(subject)
subject_str = ""
for part, encoding in decoded:
if isinstance(part, bytes):
subject_str += part.decode(encoding or "utf-8", errors="ignore")
else:
subject_str += part
return subject_str
def get_large_attachment_links(self, msg):
"""从邮件中提取超大附件下载链接(适配主流邮箱)"""
links = []
# 遍历邮件所有部分(正文、附件描述等)
for part in msg.walk():
content_type = part.get_content_type()
# 优先处理文本类型(超大附件链接通常在正文或附件描述中)
if content_type in ["text/plain", "text/html"]:
try:
# 解码邮件内容(处理中文乱码)
charset = part.get_content_charset() or "utf-8"
content = part.get_payload(decode=True).decode(charset, errors="ignore")
# 提取下载链接(适配网易、QQ、Gmail等超大附件链接格式)
import re
# 匹配常见下载链接格式(可根据实际邮箱扩展)
link_patterns = [
r'https?://[^\s"]+?(?:download|attach|large|file)[^\s"]*', # 包含关键词的链接
r'https?://mail\.163\.com/[^\s"]+', # 网易邮箱链接
r'https?://mail\.qq\.com/[^\s"]+', # QQ邮箱链接
r'https?://drive\.google\.com/[^\s"]+', # Gmail Drive链接
]
for pattern in link_patterns:
matches = re.findall(pattern, content)
links.extend([link.strip('"\'') for link in matches])
except Exception as e:
print(f"提取链接失败:{e}")
# 去重并过滤无效链接
unique_links = list(set(links))
valid_links = [link for link in unique_links if urlparse(link).scheme in ["http", "https"]]
return valid_links
def download_from_link(self, link, email_subject):
"""从链接下载超大附件(支持断点续传)"""
try:
# 提取文件名(从链接或邮件主题生成)
filename = urlparse(link).path.split("/")[-1]
if not filename or "." not in filename: # 若链接无文件名,用邮件主题生成
filename = f"{email_subject}_{len(os.listdir(self.save_dir)) + 1}.bin"
save_path = os.path.join(self.save_dir, filename)
print(f"开始下载附件:{filename}(链接:{link})")
# 断点续传:若文件已存在,从已下载长度开始
headers = {}
if os.path.exists(save_path):
downloaded_size = os.path.getsize(save_path)
headers["Range"] = f"bytes={downloaded_size}-"
# 发送请求下载(设置超时和重连)
response = requests.get(link, headers=headers, stream=True, timeout=30, allow_redirects=True)
response.raise_for_status() # 抛出HTTP错误(如404、500)
# 获取文件总大小(支持断点续传时可能为字节范围)
total_size = int(response.headers.get("Content-Length", 0))
if "Range" in headers:
total_size += int(headers["Range"].split("=")[1].split("-")[0])
# 写入文件(分块下载,避免占用过多内存)
with open(save_path, "ab") as f:
downloaded = 0
for chunk in response.iter_content(chunk_size=1024*1024): # 1MB分块
if chunk:
f.write(chunk)
downloaded += len(chunk)
# 显示下载进度
progress = (downloaded / total_size) * 100 if total_size > 0 else 0
print(f"\r下载进度:{progress:.2f}%({downloaded}/{total_size} 字节)", end="")
print(f"\n附件下载完成:{save_path}")
return save_path
except Exception as e:
print(f"下载失败(链接:{link}):{e}")
return None
def run(self, max_emails=10):
"""执行下载:连接邮箱,遍历邮件,提取并下载超大附件"""
try:
# 1. 连接IMAP服务器
imap = imaplib.IMAP4_SSL(self.imap_server)
imap.login(self.email_addr, self.password)
print(f"成功登录邮箱:{self.email_addr}")
# 2. 选择收件箱("INBOX"为收件箱,可改为其他文件夹如"已发送")
imap.select("INBOX")
# 3. 搜索邮件(ALL:所有邮件;UNSEEN:未读邮件,可自定义)
status, data = imap.search(None, "ALL")
email_ids = data[0].split()[-max_emails:] # 获取最新的max_emails封邮件
print(f"找到 {len(email_ids)} 封邮件,开始提取超大附件...")
# 4. 遍历邮件
for email_id in email_ids:
# 获取邮件内容
status, data = imap.fetch(email_id, "(RFC822)")
msg = email.message_from_bytes(data[0][1])
# 解码邮件主题
subject = self.decode_subject(msg["Subject"])
print(f"\n=== 处理邮件:{subject}(ID:{email_id.decode()})===")
# 提取超大附件链接
links = self.get_large_attachment_links(msg)
if not links:
print("未找到超大附件下载链接")
continue
# 下载每个链接对应的附件
for link in links:
self.download_from_link(link, subject)
# 5. 退出连接
imap.close()
imap.logout()
print("\n所有邮件处理完成!")
except Exception as e:
print(f"程序执行失败:{e}")
if __name__ == "__main__":
# 配置参数(根据自己的邮箱修改)
EMAIL_ADDR = "your_email@163.com" # 你的邮箱地址
PASSWORD = "your_auth_code" # 邮箱授权码(非登录密码)
IMAP_SERVER = "imap.163.com" # 163邮箱:imap.163.com;QQ:imap.qq.com;Gmail:imap.gmail.com
SAVE_DIR = "my_large_attachments" # 附件保存目录
# 初始化并运行下载器
downloader = EmailLargeAttachmentDownloader(
email_addr=EMAIL_ADDR,
password=PASSWORD,
imap_server=IMAP_SERVER,
save_dir=SAVE_DIR
)
downloader.run(max_emails=20) # 处理最新的20封邮件
import xbot
from xbot import print, sleep
from . import package
from .package import variables as glv
import poplib
import email
import os
import time
from datetime import datetime
from email.header import decode_header
def main(args):
pass
def email_config(keyword, email_date, save_file_path, max_search_count=10):
# 邮箱配置
email_config = {
'email': glv["EMAIL_USER"],
'password': glv["EMAIL_PWD"],
'pop3_server': 'pop.exmail.qq.com',
'pop3_port': 995
}
# 执行下载流程
server = login_email(email_config)
if server:
try:
# 将参数传递给download_attachments函数,包括新增的max_search_count
files = download_attachments(
server,
keyword,
save_file_path,
email_date,
subject_filter=['【EB正式 乐其】'],
max_search_count=max_search_count
)
if files:
print(f"成功下载文件到:{files[0]}")
return {"status": "success", "files": files}
else:
print(f"在最新的{max_search_count}封邮件中未找到匹配的附件")
return {"status": "no_files", "message": f"在最新的{max_search_count}封邮件中未找到匹配的附件"}
finally:
server.quit()
else:
return {"status": "error", "message": "邮箱登录失败"}
def decode_str(s):
"""解码邮件头中的中文乱码"""
value, charset = decode_header(s)[0]
if charset:
try:
return value.decode(charset)
except UnicodeDecodeError:
try:
return value.decode('gb18030')
except:
return value.decode('utf-8', 'ignore')
return value
# 登录POP3邮箱服务器
def login_email(config):
try:
server = poplib.POP3_SSL(config['pop3_server'], config.get('pop3_port', 995))
server.user(config['email'])
server.pass_(config['password'])
msg_count, _ = server.stat()
print(f"登录成功: {config['email']}, 邮件数量: {msg_count}")
return server
except poplib.error_proto as e:
print(f"认证失败: {e}")
return None
except Exception as e:
print(f"连接异常: {e}")
return None
def download_attachments(server, keyword, save_file_path, email_date, subject_filter=None, max_search_count=10):
try:
msg_count, _ = server.stat()
# 构建要匹配的主题字符串:关键词-邮件名日期
target_subject = f"{keyword}-{email_date}"
# 确定要检查的邮件数量,取max_search_count和实际邮件数中的较小值
check_count = min(max_search_count, msg_count)
print(f"开始搜索最新的{check_count}封邮件...")
# 从最新邮件开始处理,只检查指定数量的邮件
for i in range(msg_count, msg_count - check_count, -1):
# 获取并解析邮件
resp, lines, _ = server.retr(i)
msg = email.message_from_bytes(b'\n'.join(lines))
# 解析邮件主题
subject = decode_str(msg.get('Subject', ''))
print(f"检查邮件 {i}: {subject}")
# 检查主题是否包含目标字符串
if target_subject in subject:
# 检查主题过滤器
if subject_filter and not any(kw in subject for kw in subject_filter):
continue
# 确保保存目录存在
save_dir = os.path.dirname(save_file_path)
os.makedirs(save_dir, exist_ok=True)
# 遍历邮件所有部分,查找附件
attachments = []
for part in msg.walk():
filename = part.get_filename()
if not filename:
continue
filename = decode_str(filename)
print(f"找到附件: {filename},将保存为: {save_file_path}")
# 保存附件到指定的文件路径
with open(save_file_path, 'wb') as f:
f.write(part.get_payload(decode=True))
print(f"附件已保存到: {save_file_path}")
attachments.append(save_file_path)
# 如果有多个附件,这里只保存第一个匹配的附件
break
if attachments:
return attachments # 返回保存的文件路径
# 如果检查完指定数量的邮件仍未找到
return []
except Exception as e:
print(f"下载附件错误: {e}")
return []
结合这两个代码,帮我在下面的代码增加下载超大附件的功能