R024---UiPath之Execute Query的TimeOut属性设置引起的程序错误及解决

当Execute Query活动在RPA流程中运行超过默认30秒时,会导致程序错误。通过设置Timeout属性为500000毫秒,确保活动有足够时间执行,避免因超时引发的中断。

​一、缘起

昨天遇到一个关于Execute Query的奇怪问题,今天解决了,做个记录,供参考。

二、问题

程序执行到Execute Query这一步时(其实这一步是循环中的一个步骤,已经跑了11次,跑第12次时报错),报错信息:字典引用的key不存在。

Execute Query的SQL语句的拼接中,确实用到字典变量。

于是,在这一步前增加Write Line,把Execute Query中拼接SQL语句的语句,通过Write Line输出,结果发现这个SQL语句拼接正常,没有因为用到字段变量而报错,那为什么执行到Execute Query就报上面的关于字典的错误?

三、解决

3.1 新建流程文件,将Execute Query这一步单独复制出来执行,报错:

实际上我并没有主动取消该流程。

3.2 在navicat中执行该SQL语句,结果正常,用时72秒。

      猜想:是不是因为运行时间太长超时导致。

      于是找到Execute Query的Timeout属性,属性提示:Specifies the amount of time (in milliseconds) to wait for the activity to run before an error is thrown. The defau

import time import random import requests import json import logging import socket from pathlib import Path from openpyxl import load_workbook from selenium import webdriver from selenium.webdriver.edge.service import Service from selenium.webdriver.edge.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.action_chains import ActionChains from selenium.common.exceptions import TimeoutException, WebDriverException from fake_useragent import UserAgent, FakeUserAgentError # -------------------------- 新增:手动指定Edge浏览器驱动路径 -------------------------- EDGE_BINARY_PATH = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe" # 浏览器路径 EDGE_DRIVER_PATH = r"C:\Users\27570\Desktop\edgedriver_win32\msedgedriver.exe" # 驱动路径,需下载并指定 # ------------------------------------------------------------------------------------- # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("ip_query.log"), logging.StreamHandler() ] ) # -------------------------- 优化:整合网络连接检查 -------------------------- def check_internet_connection(): """检查网络连接是否正常,尝试多种连接方式提高可靠性""" try: # 尝试连接到Google的公共DNS服务器 socket.create_connection(("8.8.8.8", 53), timeout=5) logging.info("网络连接测试通过 (DNS)") return True except OSError: logging.warning("无法连接到DNS服务器,尝试HTTP请求...") try: # 尝试HTTP请求到百度 response = requests.get("https://www.baidu.com", timeout=5) if response.status_code == 200: logging.info("网络连接测试通过 (HTTP)") return True except requests.RequestException: logging.warning("HTTP请求失败,尝试HTTPS请求...") try: # 尝试HTTPS请求到百度 response = requests.get("https://www.baidu.com", timeout=5) if response.status_code == 200: logging.info("网络连接测试通过 (HTTPS)") return True except requests.RequestException: logging.error("HTTPS请求失败") return False # ------------------------------------------------------------------------------------- def build_query_url(base_url, ip_address, path_format="{ip}/", use_params=True, param_name="ip"): """构建IP查询URL,支持路径参数查询参数两种格式""" if not base_url.startswith(('http://', 'https://')): base_url = 'https://' + base_url base_url = base_url.rstrip('/') try: if '.' in ip_address: # IPv4 socket.inet_pton(socket.AF_INET, ip_address) elif ':' in ip_address: # IPv6 socket.inet_pton(socket.AF_INET6, ip_address) else: raise ValueError("无效的IP地址") if ':' in ip_address: ip_address = f"[{ip_address}]" if use_params: # 使用查询参数的方式构造URL from urllib.parse import urlencode # 添加固定参数action=2 params = {param_name: ip_address, "action": 2} return f"{base_url}?{urlencode(params)}" else: # 原有的路径参数方式 from urllib.parse import quote encoded_ip = quote(ip_address) return f"{base_url}/{path_format.format(ip=encoded_ip)}" except socket.error: logging.error(f"无效的IP地址格式: {ip_address}") return None except Exception as e: logging.error(f"构建查询URL时出错: {str(e)}") return None # 手动配置的固定请求头 MANUAL_HEADERS = { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-encoding': 'gzip, deflate, br, zstd', 'accept-language': 'zh-CN,zh;q=0.9', 'cache-control': 'max-age=0', 'connection': 'keep-alive', 'cookie': '_c_WBKFRo=NWgPw1zeBaW3I2CtOcadfhJJw33TcEYmWMtyGzTE; Hm_lvt_f4f76646cd877e538aa1fbbdf351c548=1753560343,1753617545,1753793389,1753862286; HMACCOUNT=96B6BD9DE68EFF3B; PHPSESSID=o9fnnscr7sofru4b8r1khlde3f; Hm_lvt_f4f76646cd877e538aa1fbbdf351c548=1754123598; HMACCOUNT=96B6BD9DE68EFF3B; Hm_lpvt_f4f76646cd877e538aa1fbbdf351c548=1754611428; Hm_lpvt_f4f76646cd877e538aa1fbbdf351c548=1754613370', 'host': 'www.ip138.com', 'referer': 'https://www.ip138.com/iplookup.php?ip=27.154.214.154&action=2', 'sec-ch-ua': '"Not)A;Brand";v="8", "Chromium";v="138", "Microsoft Edge";v="138"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'same-origin', 'sec-fetch-user': '?1', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36 Edg/138.0.0.0' } def configure_driver(max_retries=5): """配置Edge浏览器驱动""" for attempt in range(max_retries): try: # 先检查网络连接 if not check_internet_connection(): raise Exception("网络连接不可用") # 创建Edge选项 edge_options = Options() # 1. 基础配置 edge_options.binary_location = EDGE_BINARY_PATH # 手动指定浏览器路径 edge_options.add_argument("--disable-blink-features=AutomationControlled") # 核心反检测 edge_options.add_experimental_option("excludeSwitches", ["enable-automation"]) edge_options.add_experimental_option("useAutomationExtension", False) edge_options.add_argument("--start-maximized") # 最大化窗口 # 2. 增强反检测 edge_options.add_argument("--disable-extensions") edge_options.add_argument("--disable-plugins-discovery") edge_options.add_argument("--disable-web-security") # 3. 随机化配置 features_to_disable = [ "AutomationControlled", "InterestCohort", "BlinkGenPropertyTrees" ] edge_options.add_argument(f"--disable-features={','.join(random.sample(features_to_disable, random.randint(2, 4)))}") screen_sizes = [(1366, 768), (1920, 1080), (1536, 864)] width, height = random.choice(screen_sizes) edge_options.add_argument(f"--window-size={width},{height}") if random.random() > 0.5: edge_options.add_argument("--disable-gpu") else: edge_options.add_argument("--enable-gpu-rasterization") # 4. 资源加载控制 prefs = { "profile.managed_default_content_settings.images": 2, "profile.managed_default_content_settings.stylesheets": 2, } edge_options.add_experimental_option("prefs", prefs) edge_options.page_load_strategy = 'eager' # 只等待DOM加载 # 5. 使用手动指定的驱动路径 try: service = Service(EDGE_DRIVER_PATH) # 手动指定驱动路径 logging.info(f"使用手动指定的驱动路径: {EDGE_DRIVER_PATH}") except Exception as e: logging.error(f"驱动路径配置错误: {str(e)}") raise # 6. 创建浏览器实例 driver = webdriver.Edge(service=service, options=edge_options) # 7. 隐藏自动化特征 driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ // 核心:隐藏webdriver标志 Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); // 模拟Chrome特征 window.chrome = { runtime: {} }; // 模拟时区 Intl.DateTimeFormat().resolvedOptions().timeZone = ['Asia/Shanghai', 'Asia/Beijing'][Math.floor(Math.random() * 2)]; """ }) # 8. 设置超时 driver.set_page_load_timeout(30) driver.set_script_timeout(30) # 9. 应用手动配置的请求头 logging.info(f"应用手动配置的请求头: {json.dumps(MANUAL_HEADERS, indent=2)[:100]}...") driver.execute_cdp_cmd("Network.setUserAgentOverride", { "userAgent": MANUAL_HEADERS["user-agent"], "accept": MANUAL_HEADERS["accept"], "acceptLanguage": MANUAL_HEADERS["accept-language"], }) logging.info(f"浏览器驱动初始化成功 (尝试 {attempt+1}/{max_retries})") return driver except Exception as e: logging.error(f"配置浏览器驱动失败 (尝试 {attempt+1}/{max_retries}): {str(e)}") if attempt < max_retries - 1: wait_time = 2 ** attempt + random.uniform(5, 10) logging.info(f"将在 {wait_time:.2f} 秒后重试") time.sleep(wait_time) logging.critical("达到最大重试次数,无法初始化浏览器驱动") return None def change_user_agent(driver): """更换为手动配置的请求头""" logging.info(f"应用手动配置的请求头: {json.dumps(MANUAL_HEADERS, indent=2)[:100]}...") driver.execute_cdp_cmd("Network.setUserAgentOverride", { "userAgent": MANUAL_HEADERS["user-agent"], "accept": MANUAL_HEADERS["accept"], "acceptLanguage": MANUAL_HEADERS["accept-language"], }) driver.refresh() time.sleep(random.uniform(2, 4)) def handle_cookies(driver): """处理保存Cookie""" cookies = driver.get_cookies() logging.info(f"获取到 {len(cookies)} 个Cookie") return cookies def is_banned(driver): """检测是否被封禁""" try: banned_xpaths = [ '//div[contains(text(), "访问被阻止")]', '//div[contains(text(), "验证码")]', '//div[contains(text(), "您的IP已被封禁")]', ] for xpath in banned_xpaths: if WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.XPATH, xpath)) ): logging.warning("检测到封禁页面") return True return False except: return False def check_dynamic_element(driver, xpaths): """检查网页上是否存在任一动态XPath的元素""" for i, xpath in enumerate(xpaths, 1): try: WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.XPATH, xpath)) ) logging.info(f"使用动态元素XPath {i}: {xpath}") return True except: continue return False def get_result_element(driver, xpaths): """尝试获取任一结果元素""" for i, xpath in enumerate(xpaths, 1): try: element = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH, xpath)) ) logging.info(f"使用结果元素XPath {i}: {xpath}") return element except: continue return None def simulate_human_behavior(driver): """模拟人类浏览行为""" try: # 随机滚动页面 scroll_height = driver.execute_script("return document.body.scrollHeight") scroll_steps = random.randint(3, 7) for i in range(scroll_steps): scroll_to = int(scroll_height * (i + 1) / scroll_steps) driver.execute_script(f"window.scrollTo(0, {scroll_to})") time.sleep(random.uniform(0.5, 1.5)) # 随机移动鼠标 actions = ActionChains(driver) elements = driver.find_elements(By.TAG_NAME, "a") if elements: for _ in range(random.randint(1, 3)): element = random.choice(elements) actions.move_to_element(element).perform() time.sleep(random.uniform(0.3, 0.8)) except Exception as e: logging.warning(f"模拟人类行为时出错: {e}") def query_ip(driver, ip_address, base_url, xpath_expressions, dynamic_xpaths, max_retries=5): """查询IP信息,添加封禁检测处理""" if not ip_address or not isinstance(ip_address, str) or ip_address.strip() == "": logging.warning(f"无效的IP地址: {ip_address}") return "无效IP" ip_address = ip_address.strip() for attempt in range(max_retries): try: if attempt > 0: wait_time = 2 ** attempt + random.uniform(3, 7) logging.info(f"第 {attempt+1} 次重试前等待 {wait_time:.2f} 秒...") time.sleep(wait_time) change_user_agent(driver) query_url = build_query_url(base_url, ip_address) if not query_url: logging.error(f"无法构建有效的查询URL,IP: {ip_address}") return "无效URL" logging.info(f"访问查询URL (尝试 {attempt+1}/{max_retries}): {query_url}") try: driver.get(query_url) time.sleep(random.uniform(8, 15)) if is_banned(driver): logging.warning(f"IP {ip_address} 查询时被封禁") driver.quit() time.sleep(5) driver = configure_driver() time.sleep(5) continue current_url = driver.current_url if current_url == "data:," or "about:blank" in current_url: raise Exception("浏览器加载了空白页面") simulate_human_behavior(driver) time.sleep(random.uniform(2, 5)) handle_cookies(driver) except TimeoutException: logging.warning(f"页面加载超时,尝试重新加载") driver.refresh() time.sleep(15) continue if check_dynamic_element(driver, dynamic_xpaths): logging.info(f"检测到动态元素,结果将设为'动态'") return "动态" result_element = get_result_element(driver, xpath_expressions) if result_element: return result_element.text.strip() else: raise Exception("无法找到结果元素") except WebDriverException as e: logging.error(f"WebDriver错误 (尝试 {attempt+1}/{max_retries}): {str(e)}") if "ERR_EMPTY_RESPONSE" in str(e) or "ERR_CONNECTION_RESET" in str(e): logging.warning("检测到连接错误,尝试重启浏览器...") driver.quit() time.sleep(15) driver = configure_driver() time.sleep(10) else: time.sleep(2 ** attempt + random.uniform(5, 10)) continue except Exception as e: logging.error(f"查询IP {ip_address} 失败 (尝试 {attempt+1}/{max_retries}): {str(e)}") time.sleep(2 ** attempt + random.uniform(5, 10)) continue logging.error(f"IP {ip_address} 查询失败,已达到最大重试次数") driver.save_screenshot(f"error_{ip_address}.png") return "查询失败" def is_row_hidden(worksheet, row_idx): """检查Excel行是否被隐藏""" return worksheet.row_dimensions[row_idx].hidden def process_excel(input_file, base_url, xpath_expressions, dynamic_xpaths, ip_column='A', result_column='I', start_row=2): """处理Excel文件""" wb = load_workbook(input_file) ws = wb.active has_filter = ws.auto_filter.ref is not None logging.info(f"检测到筛选: {has_filter}") driver = configure_driver() if not driver: logging.critical("无法初始化浏览器驱动,退出程序") return visible_rows = [] for row_idx in range(start_row, ws.max_row + 1): row_dim = ws.row_dimensions.get(row_idx) if not row_dim or not row_dim.hidden: visible_rows.append(row_idx) logging.info(f"可见行共{len(visible_rows)}行") total_visible = len(visible_rows) processed_count = 0 try: for i, row in enumerate(visible_rows, 1): ip_address = ws[f"{ip_column}{row}"].value if not ip_address: logging.info(f"第 {row} 行IP地址为空,跳过") continue logging.info(f"正在查询IP: {ip_address} ({i}/{total_visible})") result = query_ip(driver, ip_address, base_url, xpath_expressions, dynamic_xpaths) ws[f"{result_column}{row}"] = result processed_count += 1 if i % 3 == 0 or i == total_visible: wb.save(input_file) logging.info(f"已保存进度: {i}/{total_visible} 到 {input_file}") wait_time = random.uniform(20,40) logging.info(f"等待 {wait_time:.2f} 秒后继续...") time.sleep(wait_time) if i % 10 == 0: extra_wait = random.uniform(40,60) logging.info(f"已处理 {i} 个IP,额外休息 {extra_wait:.2f} 秒...") time.sleep(extra_wait) if i % 20 == 0: logging.info(f"已处理 {i} 个IP,重启浏览器以避免被检测...") driver.quit() time.sleep(15) driver = configure_driver() if not driver: logging.critical("无法重新初始化浏览器驱动,退出程序") return except Exception as e: logging.critical(f"处理过程中发生意外错误: {str(e)}") finally: if driver: driver.quit() wb.save(input_file) logging.info(f"已保存最终结果到 {input_file}") logging.info(f"处理完成!共处理 {processed_count}/{total_visible} 个可见IP地址") if __name__ == "__main__": INPUT_FILE = r"C:\Users\27570\Desktop\飞塔-福建-简版-更新版20250730.xlsx" # 修改为新的基础URL BASE_URL = "https://www.ip138.com/iplookup.php" # 配置两种XPath表达式 XPATH_EXPRESSIONS = [ '/html/body/div/div[2]/div[1]/div/table/tbody/tr[2]/td[2]', '/html/body/div/div[2]/div[2]/div/div[2]/div[1]/div/div[2]/div[2]/div[2]/table/tbody/tr[2]/td[2]' ] DYNAMIC_XPATHS = [ '/html/body/div/div[2]/div[1]/div/p/a', '/html/body/div/div[2]/div[2]/div/div[2]/div[1]/div/div[2]/div[2]/div[1]/p[1]/a' ] max_main_retries = 3 for main_attempt in range(max_main_retries): try: logging.info(f"开始处理Excel文件 (尝试 {main_attempt+1}/{max_main_retries})") process_excel(INPUT_FILE, BASE_URL, XPATH_EXPRESSIONS, DYNAMIC_XPATHS) break except Exception as e: logging.critical(f"主程序执行失败 (尝试 {main_attempt+1}/{max_main_retries}): {str(e)}") if main_attempt < max_main_retries - 1: wait_time = 10 + random.uniform(10, 30) logging.info(f"将在 {wait_time:.2f} 秒后重试") time.sleep(wait_time) else: logging.critical("达到最大重试次数,程序终止") 以上代码在运行时出现了以下问题,解决问题并给我完整代码 Message: session not created: probably user data directory is already in use, please specify a unique value for --user-data-dir argument, or don't use --user-data-dir; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#sessionnotcreatedexception Stacktrace: GetHandleVerifier [0x0xf6d593+37219] (No symbol) [0x0xe1a716] (No symbol) [0x0xbe51ce] (No symbol) [0x0xc0d881] (No symbol) [0x0xc08a43] (No symbol) [0x0xc3d78b] (No symbol) [0x0xc3d22a] (No symbol) [0x0xc324f6] (No symbol) [0x0xc13327] (No symbol) [0x0xc12723] (No symbol) [0x0xc13144] sqlite3_dbdata_init [0x0x105a89c+518364] sqlite3_dbdata_init [0x0x1141ab0+1465072] sqlite3_dbdata_init [0x0x11413e5+1463333] sqlite3_dbdata_init [0x0x11328ec+1403180] sqlite3_dbdata_init [0x0x11422d2+1467154] (No symbol) [0x0xe31d9d] (No symbol) [0x0xe25108] (No symbol) [0x0xe252fb] (No symbol) [0x0xe0a649] BaseThreadInitThunk [0x0x75525d49+25] RtlInitializeExceptionChain [0x0x7706d1ab+107] RtlGetAppContainerNamedObjectPath [0x0x7706d131+561]
最新发布
08-09
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值