import pandas as pd
import quopri
import re
from chardet import detect
from opencc import OpenCC
# 初始化 OpenCC 转换器:'t2s' 表示从繁体转简体
cc = OpenCC('t2s')
def to_simplified(text):
"""将文本中的繁体中文转换为简体中文"""
if not text:
return ''
return cc.convert(text)
def decode_quoted_printable(text, charset='utf-8'):
"""
安全地解码 Quoted-Printable 内容,并尝试多种编码方式
"""
try:
decoded = quopri.decodestring(text).decode(charset)
except UnicodeDecodeError:
try:
decoded = quopri.decodestring(text).decode('gbk', errors='replace')
except Exception:
decoded = quopri.decodestring(text).decode('latin1', errors='replace')
return decoded
def parse_vcard_file(file_path):
contacts = []
current_name = None
with open(file_path, 'rb') as f:
for line in f:
line = line.strip()
if not line:
continue
# 尝试自动检测编码
encoding = detect(line)['encoding'] or 'utf-8'
try:
decoded_line = line.decode(encoding, errors='replace')
except Exception:
decoded_line = line.decode('latin1', errors='replace')
# 处理 FN(姓名)
if decoded_line.lower().startswith('fn;'):
charset_match = re.search(r'CHARSET=([^;:,]+)', decoded_line, re.IGNORECASE)
charset = charset_match.group(1).strip() if charset_match else 'utf-8'
qp_match = re.search(r'ENCODING=QUOTED-PRINTABLE', decoded_line, re.IGNORECASE)
if qp_match:
match = re.search(r'FN(;[^:]*?)?:(.*)', decoded_line, re.IGNORECASE)
if match and match.group(2):
encoded_value = match.group(2).strip()
# 替换下划线为等号(RFC 2045 规定 _ 是 = 的替代)
encoded_value = encoded_value.replace('_', '=')
decoded_name = decode_quoted_printable(encoded_value, charset)
current_name = decoded_name
else:
current_name = decoded_line.split(':', 1)[1].strip()
elif decoded_line.lower().startswith('fn:'):
current_name = decoded_line.split(':', 1)[1].strip()
# 处理 TEL(电话)
elif decoded_line.lower().startswith('tel;') or decoded_line.lower().startswith('telx;'):
parts = decoded_line.split(':', 1)
if len(parts) > 1:
phone = parts[1].strip()
if phone:
simplified_name = to_simplified(current_name) if current_name else "未知联系人"
contacts.append({
'Name': simplified_name,
'Phone': phone
})
print(f"已添加联系人:{simplified_name} - {phone}")
current_name = None # 每个电话只匹配一个名字
return contacts
# 文件路径
vcf_path = r"C:\Users\Lenovo\Desktop\00004.vcf"
output_excel = r"C:\Users\Lenovo\Desktop\谢忠发的电话目录.xlsx"
# 解析 vCard 文件
contacts = parse_vcard_file(vcf_path)
# 写入 Excel
df = pd.DataFrame(contacts)
df.to_excel(output_excel, index=False, sheet_name='Contacts')
print(f"成功导出至 {output_excel}")
文字乱码的部分没有调整好,有小伙伴有合适解决方法,欢迎评论区交流--