使用Python实现自动抓取微信联系人源代码
这是我基于python的自动化库uiautomation
库实现使用PC版微信抓取微信联系人的源代码,可参考使用。
# coding=utf-8
import uiautomation as auto
from typing import Generator
import psutil
from win32com.client import Dispatch
class WxContactsSpider:
"""用于抓取微信联系人的类"""
def __init__(self):
self._wx = auto.WindowControl(ClassName="WeChatMainWndForPC", Name="微信", searchDepth=1)
self._lxr_control = self._wx.ListControl(Name="联系人")
self._exclusive_items = {"新的朋友", "公众号"}
self._node_box = {"星标朋友", "公众号", "群聊"}
self._node_box.update({chr(i) for i in range(ord('A'), ord('Z') + 1)})
self._group_box = set()
def _add_exclusive_items(self, contact_control: auto.ListControl) -> None:
"""添加排除项"""
self._wx.SwitchToThisWindow()
items = contact_control.GetChildren()
flag = False
for item in items:
if item.ControlType == auto.ControlType.PaneControl:
continue
name = item.TextControl().Name
if name == "企业号":
flag = True
elif flag is True:
if name in self._node_box:
flag = False
continue
self._exclusive_items.add(item.Name)
elif item.Name != name:
self._exclusive_items.add(item.Name)
def _add_group_elems(self, contact_control: auto.ListControl) -> None:
"""添加群聊项"""
self._wx.SwitchToThisWindow()
items = contact_control.GetChildren()
flag = False
for item in items:
if item.ControlType == auto.ControlType.PaneControl:
continue
name = item.TextControl().Name
if name == "群聊":
flag = True
elif flag is True:
if name in self._node_box:
break
self._group_box.add(item.Name)
def _get_current_item_name(self, contact_control: auto.ListControl) -> tuple:
"""获取当前项名称"""
self._wx.SwitchToThisWindow()
items = contact_control.GetChildren()
for item in items:
if item.ControlType != auto.ControlType.ListItemControl:
continue
elif item.Name == "":
continue
elif item.GetPropertyValue(auto.PropertyId.SelectionItemIsSelectedProperty):
return item.Name, item.TextControl().Name
else:
raise Exception("系统错误!")
def _call_wx(self) -> bool:
"""检查微信窗口是否存在"""
if not self._wx.Exists(1):
print("未检测到微信窗口,请登录微信并让微信窗口最小化或显示在桌面上。")
return False
# 切换到微信主窗口
self._wx.SwitchToThisWindow()
return True
def run(self, refresh: bool = True) -> Generator:
"""
运行爬取微信联系人
:param refresh: 是否从头抓取联系人
"""
if not isinstance(refresh, bool):
raise TypeError("参数refresh需要是bool类型")
is_success = self._call_wx()
if is_success is False:
return
pid = self._wx.ProcessId
version = get_exe_version(pid)
if version != "3.9.6.47":
print("当前微信版本'{}'不匹配程序默认微信版本'3.9.6.47'".format(version))
# 切换到通讯录页面
txl_btn = self._wx.ButtonControl(Name="通讯录")
txl_btn.Click(waitTime=1)
# 聚焦到通讯录列表
self._lxr_control.GetChildren()[2].Click(waitTime=1)
if refresh is True:
# 刷新页面
self._wx.SendKey(key=auto.SpecialKeyNames["HOME"], waitTime=1)
# 存放所有联系人信息
contact_msg_list = []
while True:
# 存放单条联系人信息
contact_msg = {}
if refresh is True:
self._add_exclusive_items(self._lxr_control)
self._add_group_elems(self._lxr_control)
current_item_name, current_sub_item_name = self._get_current_item_name(
self._lxr_control)
if current_item_name in self._exclusive_items:
# 非抓取项
print("此项不为不为微信联系人")
self._wx.SwitchToThisWindow()
# 按下下键翻页
self._wx.SendKey(key=auto.SpecialKeyNames["DOWN"], waitTime=1)
continue
elif refresh is False and current_sub_item_name != current_item_name:
# 非抓取项
print("此项不为不为微信联系人")
self._wx.SwitchToThisWindow()
# 按下下键翻页
self._wx.SendKey(key=auto.SpecialKeyNames["DOWN"], waitTime=1)
continue
elif current_item_name in self._group_box:
# 群聊
contact_msg["index_name"] = current_item_name
contact_msg["nickname"] = current_item_name
contact_msg["wxid"] = ""
contact_msg["group"] = 1
else:
# 好友
# 存放抓取到的内容
names_list = []
self._wx.SwitchToThisWindow()
right_area = self._wx.ToolBarControl(
Name="导航").GetParentControl().GetLastChildControl()
photo_btn = right_area.ButtonControl(Name=current_item_name)
fxx_btn = right_area.ButtonControl(Name="发消息")
if refresh is False and not fxx_btn.Exists(1):
# 非抓取项
print("此项不为不为微信联系人")
self._wx.SwitchToThisWindow()
# 按下下键翻页
self._wx.SendKey(
key=auto.SpecialKeyNames["DOWN"], waitTime=1)
continue
elif refresh is False and not photo_btn.Exists(1):
# 群聊
print("此项为群聊")
contact_msg["index_name"] = current_item_name
contact_msg["nickname"] = current_item_name
contact_msg["wxid"] = ""
contact_msg["group"] = 1
else:
# 好友
target_area = right_area.ButtonControl(
Name=current_item_name).GetNextSiblingControl()
for control, depth in auto.WalkControl(target_area):
if control.ControlType != auto.ControlType.TextControl:
continue
names_list.append(control.Name)
index_name = names_list.pop(0)
contact_msg = {"index_name": index_name,
"nickname": index_name, "group": 0}
mapping = {"微信号:": "wxid",
"昵称:": "nickname", "地区:": "region"}
for index, value in enumerate(names_list):
key = mapping.get(value)
if key:
contact_msg[key] = names_list[index+1]
# 检查是否一致
if contact_msg["index_name"] != current_item_name:
print(
f'列表项名称[{current_item_name}]与页面名称[{contact_msg["index_name"]}]不匹配!')
self._wx.SwitchToThisWindow()
self._wx.SendKey(key=auto.SpecialKeyNames["DOWN"], waitTime=1)
continue
# 检查是否抓取完成 当前昵称与微信号与列表最后一个昵称与微信号一致
if contact_msg_list and contact_msg["nickname"] == contact_msg_list[-1]["nickname"] and contact_msg["wxid"] == contact_msg_list[-1]["wxid"]:
self._wx.SwitchToThisWindow()
print("自动爬取微信联系人任务运行完成")
return
yield contact_msg
contact_msg_list.append(contact_msg)
self._wx.SwitchToThisWindow()
# 按下下键翻页
self._wx.SendKey(key=auto.SpecialKeyNames["DOWN"], waitTime=1)
def get_exe_version(pid: int):
"""
获取应用程序版本
:param pid: 进程PID
"""
process = psutil.Process(pid)
exe_path = process.exe()
parser = Dispatch("Scripting.FileSystemObject")
version = parser.GetFileVersion(exe_path)
return version
if __name__ == "__main__":
import csv
import uuid
header = ["index_name", "nickname", "wxid", "region", "group"]
with open("微信联系人.{}.csv".format(uuid.uuid1()), mode="a", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(f, fieldnames=header)
writer.writeheader()
spider = WxContactsSpider()
for contact_msg in spider.run():
print(contact_msg)
writer.writerow(contact_msg)
uiautomation参考链接
微软官方: https://learn.microsoft.com/zh-cn/dotnet/framework/ui-automation/using-ui-automation-for-automated-testing
GitHub: https://github.com/yinkaisheng/Python-UIAutomation-for-Windows
写在最后
- 代码仅供学习交流使用,请勿用于其他用途。
- 基于PC端微信版本
3.9.6.47
进行编写的代码,其他版本的微信很可能不适用。 uiautomation
版本为2.0.18
。python
版本为3.8
。
文章编写于2024年1月