使用Python实现自动抓取微信联系人源代码

使用Python实现自动抓取微信联系人源代码

这是我基于python的自动化库uiautomation库实现使用PC版微信抓取微信联系人的源代码,可参考使用。

# coding=utf-8
import uiautomation as auto
from typing import Generator
import psutil
from win32com.client import Dispatch


class WxContactsSpider:
    """用于抓取微信联系人的类"""

    def __init__(self):
        self._wx = auto.WindowControl(ClassName="WeChatMainWndForPC", Name="微信", searchDepth=1)
        self._lxr_control = self._wx.ListControl(Name="联系人")
        self._exclusive_items = {"新的朋友", "公众号"}
        self._node_box = {"星标朋友", "公众号", "群聊"}
        self._node_box.update({chr(i) for i in range(ord('A'), ord('Z') + 1)})
        self._group_box = set()

    def _add_exclusive_items(self, contact_control: auto.ListControl) -> None:
        """添加排除项"""
        self._wx.SwitchToThisWindow()
        items = contact_control.GetChildren()
        flag = False
        for item in items:
            if item.ControlType == auto.ControlType.PaneControl:
                continue
            name = item.TextControl().Name
            if name == "企业号":
                flag = True
            elif flag is True:
                if name in self._node_box:
                    flag = False
                    continue
                self._exclusive_items.add(item.Name)
            elif item.Name != name:
                self._exclusive_items.add(item.Name)

    def _add_group_elems(self, contact_control: auto.ListControl) -> None:
        """添加群聊项"""
        self._wx.SwitchToThisWindow()
        items = contact_control.GetChildren()
        flag = False
        for item in items:
            if item.ControlType == auto.ControlType.PaneControl:
                continue
            name = item.TextControl().Name
            if name == "群聊":
                flag = True
            elif flag is True:
                if name in self._node_box:
                    break
                self._group_box.add(item.Name)

    def _get_current_item_name(self, contact_control: auto.ListControl) -> tuple:
        """获取当前项名称"""
        self._wx.SwitchToThisWindow()
        items = contact_control.GetChildren()
        for item in items:
            if item.ControlType != auto.ControlType.ListItemControl:
                continue
            elif item.Name == "":
                continue
            elif item.GetPropertyValue(auto.PropertyId.SelectionItemIsSelectedProperty):
                return item.Name, item.TextControl().Name
        else:
            raise Exception("系统错误!")

    def _call_wx(self) -> bool:
        """检查微信窗口是否存在"""
        if not self._wx.Exists(1):
            print("未检测到微信窗口,请登录微信并让微信窗口最小化或显示在桌面上。")
            return False
        # 切换到微信主窗口
        self._wx.SwitchToThisWindow()
        return True

    def run(self, refresh: bool = True) -> Generator:
        """
        运行爬取微信联系人

        :param refresh: 是否从头抓取联系人
        """
        if not isinstance(refresh, bool):
            raise TypeError("参数refresh需要是bool类型")
        is_success = self._call_wx()
        if is_success is False:
            return
        pid = self._wx.ProcessId
        version = get_exe_version(pid)
        if version != "3.9.6.47":
            print("当前微信版本'{}'不匹配程序默认微信版本'3.9.6.47'".format(version))
        # 切换到通讯录页面
        txl_btn = self._wx.ButtonControl(Name="通讯录")
        txl_btn.Click(waitTime=1)
        # 聚焦到通讯录列表
        self._lxr_control.GetChildren()[2].Click(waitTime=1)
        if refresh is True:
            # 刷新页面
            self._wx.SendKey(key=auto.SpecialKeyNames["HOME"], waitTime=1)
        # 存放所有联系人信息
        contact_msg_list = []
        while True:
            # 存放单条联系人信息
            contact_msg = {}
            if refresh is True:
                self._add_exclusive_items(self._lxr_control)
                self._add_group_elems(self._lxr_control)
            current_item_name, current_sub_item_name = self._get_current_item_name(
                self._lxr_control)
            if current_item_name in self._exclusive_items:
                # 非抓取项
                print("此项不为不为微信联系人")
                self._wx.SwitchToThisWindow()
                # 按下下键翻页
                self._wx.SendKey(key=auto.SpecialKeyNames["DOWN"], waitTime=1)
                continue
            elif refresh is False and current_sub_item_name != current_item_name:
                # 非抓取项
                print("此项不为不为微信联系人")
                self._wx.SwitchToThisWindow()
                # 按下下键翻页
                self._wx.SendKey(key=auto.SpecialKeyNames["DOWN"], waitTime=1)
                continue
            elif current_item_name in self._group_box:
                # 群聊
                contact_msg["index_name"] = current_item_name
                contact_msg["nickname"] = current_item_name
                contact_msg["wxid"] = ""
                contact_msg["group"] = 1
            else:
                # 好友
                # 存放抓取到的内容
                names_list = []
                self._wx.SwitchToThisWindow()
                right_area = self._wx.ToolBarControl(
                    Name="导航").GetParentControl().GetLastChildControl()
                photo_btn = right_area.ButtonControl(Name=current_item_name)
                fxx_btn = right_area.ButtonControl(Name="发消息")
                if refresh is False and not fxx_btn.Exists(1):
                    # 非抓取项
                    print("此项不为不为微信联系人")
                    self._wx.SwitchToThisWindow()
                    # 按下下键翻页
                    self._wx.SendKey(
                        key=auto.SpecialKeyNames["DOWN"], waitTime=1)
                    continue
                elif refresh is False and not photo_btn.Exists(1):
                    # 群聊
                    print("此项为群聊")
                    contact_msg["index_name"] = current_item_name
                    contact_msg["nickname"] = current_item_name
                    contact_msg["wxid"] = ""
                    contact_msg["group"] = 1
                else:
                    # 好友
                    target_area = right_area.ButtonControl(
                        Name=current_item_name).GetNextSiblingControl()
                    for control, depth in auto.WalkControl(target_area):
                        if control.ControlType != auto.ControlType.TextControl:
                            continue
                        names_list.append(control.Name)
                    index_name = names_list.pop(0)
                    contact_msg = {"index_name": index_name,
                                   "nickname": index_name, "group": 0}
                    mapping = {"微信号:": "wxid",
                               "昵称:": "nickname", "地区:": "region"}
                    for index, value in enumerate(names_list):
                        key = mapping.get(value)
                        if key:
                            contact_msg[key] = names_list[index+1]
            # 检查是否一致
            if contact_msg["index_name"] != current_item_name:
                print(
                    f'列表项名称[{current_item_name}]与页面名称[{contact_msg["index_name"]}]不匹配!')
                self._wx.SwitchToThisWindow()
                self._wx.SendKey(key=auto.SpecialKeyNames["DOWN"], waitTime=1)
                continue
            # 检查是否抓取完成 当前昵称与微信号与列表最后一个昵称与微信号一致
            if contact_msg_list and contact_msg["nickname"] == contact_msg_list[-1]["nickname"] and contact_msg["wxid"] == contact_msg_list[-1]["wxid"]:
                self._wx.SwitchToThisWindow()
                print("自动爬取微信联系人任务运行完成")
                return
            yield contact_msg
            contact_msg_list.append(contact_msg)
            self._wx.SwitchToThisWindow()
            # 按下下键翻页
            self._wx.SendKey(key=auto.SpecialKeyNames["DOWN"], waitTime=1)


def get_exe_version(pid: int):
    """
    获取应用程序版本
    
    :param pid: 进程PID
    """
    process = psutil.Process(pid)
    exe_path = process.exe()
    parser = Dispatch("Scripting.FileSystemObject")
    version = parser.GetFileVersion(exe_path)
    return version


if __name__ == "__main__":
    import csv
    import uuid

    header = ["index_name", "nickname", "wxid", "region", "group"]
    with open("微信联系人.{}.csv".format(uuid.uuid1()), mode="a", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=header)
        writer.writeheader()
        spider = WxContactsSpider()
        for contact_msg in spider.run():
            print(contact_msg)
            writer.writerow(contact_msg)

uiautomation参考链接

微软官方: https://learn.microsoft.com/zh-cn/dotnet/framework/ui-automation/using-ui-automation-for-automated-testing

GitHub: https://github.com/yinkaisheng/Python-UIAutomation-for-Windows

写在最后

  1. 代码仅供学习交流使用,请勿用于其他用途。
  2. 基于PC端微信版本3.9.6.47进行编写的代码,其他版本的微信很可能不适用。
  3. uiautomation版本为2.0.18
  4. python版本为3.8

文章编写于2024年1月

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值