Browser queue wait time

本文深入探讨了浏览器队列等待时间的原因,包括JavaScript文件加载顺序和并发请求限制,并提供策略来优化这些因素,从而提高网页加载速度。通过将图像文件托管到单独的子域,可以有效避免并发请求限制的影响。

There can be multiple reasons for a request to wait in the browser queue before it is sent to the server. The two most common reasons are:


    • If there are multiple JavaScript files on a web page, they are loaded one after the other by the browser and not loaded simultaneously. They are loaded in the same sequence as they are present on the web page. A web developer should always consider if it is possible to convert multiple JavaScript files on a page into a single JavaScript file.


      • Each browser has a limit on the number of concurrent requests that can be made to a single domain by a web page. For example, this limit is six for Firefox 3. If there are eight images on a web page that have to be loaded from the same domain, six requests will be made simultaneously, and the request for two images has to wait before a response for any of the previous two requests is received from the server.

 


When optimizing the performance of a web application from the browser's perspective, browser queue wait time is a very important consideration.


So how to bypass the maximum concurrent requests limit by the browser ?


If, for whatever reason, a web page needs to make a lot of requests to the server to get images, CSS files, AJAX responses, and so on, then one common technique is to host the image files on a separate subdomain. For example, host all the images for the Packt site on images.packtpub.com instead of the www.packtpub.com subdomain. However, it is important to keep in mind that every subdomain that is added also requires a DNS lookup. Based on a study by Yahoo, having two to four subdomains for a site is a good compromise between parallel requests and DNS lookups.

不使用递归,使用while,修改代码 from selenium import webdriver from selenium.webdriver.chrome.options import Options as ChromeOptions from selenium.webdriver.edge.options import Options as EdgeOptions from selenium.webdriver.firefox.options import Options as FirefoxOptions from selenium.webdriver.edge.service import Service as EdgeService from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, ElementClickInterceptedException import time import logging # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler()] ) logger = logging.getLogger(__name__) # 浏览器配置类 class BrowserConfig: def __init__(self, browser_type='edge', headless=False): self.browser_type = browser_type.lower() self.headless = headless self.options = self._setup_options() self.driver_path = self._get_driver_path() def _setup_options(self): options_map = { 'chrome': ChromeOptions(), 'edge': EdgeOptions(), 'firefox': FirefoxOptions() } if self.browser_type not in options_map: raise ValueError(f"不支持的浏览器: {self.browser_type}") options = options_map[self.browser_type] if self.headless: headless_args = { 'chrome': ['--headless=new', '--disable-gpu'], 'edge': ['--headless=chrome'], 'firefox': ['--headless'] } for arg in headless_args[self.browser_type]: options.add_argument(arg) return options def _get_driver_path(self): return { 'edge': r"C:\own\app\python_code\work\入职培训\打印点击\msedgedriver.exe" }.get(self.browser_type) def get_driver(self): if self.browser_type == 'chrome': return webdriver.Chrome(options=self.options) elif self.browser_type == 'edge': if not self.driver_path: raise ValueError("需要Edge驱动路径") return webdriver.Edge(service=EdgeService(self.driver_path), options=self.options) elif self.browser_type == 'firefox': return webdriver.Firefox(options=self.options) raise ValueError(f"不支持的浏览器: {self.browser_type}") # 元素操作类 class ElementActions: def __init__(self, driver): self.driver = driver self.processed_inputs = set() def scroll_to(self, element): try: self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element) time.sleep(0.1) except Exception as e: logger.warning(f"滚动失败: {e}") def safe_click(self, element): try: element.click() except ElementClickInterceptedException: self.driver.execute_script("arguments[0].click();", element) def wait_for(self, by, locator, timeout=10): return WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((by, locator))) def handle_alert(self): try: WebDriverWait(self.driver, 3).until(EC.alert_is_present()).accept() except TimeoutException: pass def close_new_windows(self, original_window): for window in self.driver.window_handles: if window != original_window: self.driver.switch_to.window(window) self.driver.close() self.driver.switch_to.window(original_window) def get_element_id(self, element): try: return f"{element.tag_name}__{element.get_attribute('class')}__{element.get_attribute('vaule')}__{element.text[:30]}__{element.get_attribute('id')}" except: return str(hash(element)) def highlight(self, element, duration=1.5): original_style = element.get_attribute("style") or "" self.driver.execute_script( f"arguments[0].setAttribute('style', 'border: 2px solid red; background-color: yellow; {original_style}')", element ) time.sleep(duration) self.driver.execute_script(f"arguments[0].setAttribute('style', '{original_style}')", element) def is_sensitive(self, element): try: attrs = [element.text, element.get_attribute("value"), element.get_attribute("href"), element.get_attribute("id"), element.get_attribute("class")] sensitive_words = ["提交", "导出", "下载", "删除", "清空", "登出", "广告", "logout", "submit", "download", "delete", "clear", "Download", "OK", "エクスポート"] for attr in attrs: if attr and any(word in attr for word in sensitive_words): return True #跳过10.244.4.37的帮助按钮 if element.get_attribute("href") and "solutions.brother.com" in element.get_attribute("href"): return True if "Brother" in element.text and "Solutions" in element.text: return True except: pass return False def get_clickable(self): return self.driver.find_elements( By.CSS_SELECTOR, 'button, a, input[type="button"], input[type="submit"], input[type="radio"], input[type="checkbox"], ' 'div[role="button"], [role="menuitem"], [role="option"], [data-testid*="button"], [class*="btn"], ' '[class*="clickable"], select' ) def handle_inputs(self): try: inputs = WebDriverWait(self.driver, 5).until( EC.presence_of_all_elements_located( (By.CSS_SELECTOR, 'input[type="text"], input[type="password"], textarea')) ) for idx, elem in enumerate(inputs): elem_id = self.get_element_id(elem) if elem_id in self.processed_inputs or not (elem.is_displayed() and elem.is_enabled()): continue self.highlight(elem) logger.info(f"请在输入框 {idx + 1} 中输入内容") entered_value = elem.get_attribute("value") print(f"您在输入框 {idx + 1} 中输入的内容为: {entered_value}") time.sleep(1) self.processed_inputs.add(elem_id) except: pass # 自动化流程类 class AutomationWorkflow: def __init__(self, browser_config, login_url=None, max_depth=5, max_loop_count=100): self.browser_config = browser_config self.login_url = login_url self.driver = browser_config.get_driver() self.elem = ElementActions(self.driver) self.max_depth = max_depth # 最大递归深度 self.max_loop_count = max_loop_count # 最大循环次数 self.loop_counter = 0 # 循环计数器 self.visit_history = [] # 访问历史,用于检测循环 self.history_limit = 10 # 历史记录长度限制 if login_url: self.driver.get(login_url) self._add_to_history(login_url) logger.info(f"{self.browser_config.browser_type} 浏览器启动成功") def _add_to_history(self, url): """添加URL到访问历史,保持固定长度""" self.visit_history.append(url) if len(self.visit_history) > self.history_limit: self.visit_history.pop(0) def _is_loop_detected(self): """检测是否出现URL访问循环""" if len(self.visit_history) < self.history_limit: return False # 检查是否出现重复的模式 return len(set(self.visit_history)) < self.history_limit / 2 def login(self, password): try: self.elem.wait_for(By.NAME, 'B1bc').send_keys(password) self.elem.safe_click(self.driver.find_element(By.ID, 'login')) time.sleep(1) return True except Exception as e: logger.error(f"登录失败: {e}") return False def logout(self): try: self.elem.safe_click(self.driver.find_element(By.ID, 'logout')) logger.info("登出成功") time.sleep(1) except Exception as e: logger.error(f"登出失败: {e}") def skip_url(self, current_url): skip_list = ["http://10.244.4.37/admin/password.html", "http://10.244.4.37/net/net/net.html", "http://10.244.4.37/general/find.html"] if current_url in skip_list: self.driver.back() return True return False def is_nav_element(self, element): parent = element while True: try: parent = parent.find_element(By.XPATH, '..') if parent.tag_name.lower() == 'html': return False tag_name = parent.tag_name.lower() class_name = parent.get_attribute('class').lower() id_name = parent.get_attribute('id').lower() if tag_name == 'nav' or \ any(kw in class_name for kw in ['navbar', 'navigation', 'menu']) or \ any(kw in id_name for kw in ['nav', 'menu']): return True except Exception as e: return False def get_nav_elements(self): return [e for e in self.elem.get_clickable() if self.is_nav_element(e)] def click_navs(self, clicked, depth=0): """点击导航元素,增加了循环检测和计数""" if depth > self.max_depth: logger.info(f"点击导航达到最大递归深度 {self.max_depth},停止点击导航栏") return # 检查循环计数 self.loop_counter += 1 if self.loop_counter >= self.max_loop_count: logger.warning(f"点击导航达到最大循环次数 {self.max_loop_count},停止操作") return # 检测URL访问循环 current_url = self.driver.current_url self._add_to_history(current_url) if self._is_loop_detected(): logger.warning(f"检测到URL访问循环,当前URL: {current_url}") return original_window = self.driver.current_window_handle nav_elements = self.get_nav_elements() logger.info(f"在深度 {depth} 找到 {len(nav_elements)} 个导航元素") #当导航栏元素都被点击后会直接跳出,不会再进入递归调用 for elem in nav_elements: try: elem_id = self.elem.get_element_id(elem) if elem_id in clicked or not (elem.is_displayed() and elem.is_enabled()): continue before_url = self.driver.current_url self.elem.scroll_to(elem) self.elem.highlight(elem) self.elem.safe_click(elem) clicked.add(elem_id) logger.info(f"点击导航元素: {elem_id} (深度: {depth})") # 检查是否需要跳过 current_url = self.driver.current_url if self.skip_url(current_url): continue self.elem.handle_alert() self.elem.close_new_windows(original_window) time.sleep(0.5) self.elem.handle_inputs() # 递归调用点击所有元素 self.click_all(clicked, depth + 1) # 如果URL变化了,返回上一页 if self.driver.current_url != before_url: self.driver.back() time.sleep(0.5) except Exception as e: logger.warning(f"点击导航元素{elem_id}失败: {e}") continue def click_all(self, clicked=None, depth=0): """点击所有可点击元素,增加了循环检测和计数""" if depth > self.max_depth: logger.info(f"达到最大递归深度 {self.max_depth},停止点击") return # 初始化已点击元素集合 clicked = clicked or set() # 检查循环计数 self.loop_counter += 1 if self.loop_counter >= self.max_loop_count: logger.warning(f"达到最大循环次数 {self.max_loop_count},停止操作") return # 检测URL访问循环 current_url = self.driver.current_url self._add_to_history(current_url) if self._is_loop_detected(): logger.warning(f"检测到URL访问循环,当前URL: {current_url}") return # 先点击导航元素 self.click_navs(clicked, depth) logger.info(f"开始处理页面: {current_url} (深度: {depth})") original_url = current_url original_window = self.driver.current_window_handle elements = self.elem.get_clickable() logger.info(f"在深度 {depth} 找到 {len(elements)} 个可点击元素") for elem in elements: try: elem_id = self.elem.get_element_id(elem) # 跳过已点击、敏感元素或不可见元素 if elem_id in clicked or self.elem.is_sensitive(elem) or not ( elem.is_displayed() and elem.is_enabled()): clicked.add(elem_id) continue self.elem.scroll_to(elem) self.elem.highlight(elem) logger.info(f"点击元素: {elem_id} (深度: {depth})") self.elem.safe_click(elem) clicked.add(elem_id) # 跳过特定url current_url = self.driver.current_url if self.skip_url(current_url): continue self.elem.handle_alert() self.elem.close_new_windows(original_window) time.sleep(0.1) # 如果页面跳转,递归处理新页面 if self.driver.current_url != original_url: self.click_all(clicked, depth + 1) self.driver.back() time.sleep(0.1) except Exception as e: logger.warning(f"点击元素失败: {e}") continue logger.info(f"完成页面处理: {original_url} (深度: {depth})") def run(self, password, login_required=True): try: if login_required and self.login_url: if not self.login(password): logger.error("登录失败,终止操作") return logger.info("开始自动化操作...") self.click_all() time.sleep(2) if login_required and self.login_url: self.logout() except Exception as e: logger.error(f"自动化过程中发生错误: {e}") finally: logger.info("自动化操作完成,关闭浏览器") self.driver.quit() # 主程序入口 if __name__ == "__main__": login_url = "http://10.244.4.37/" #"https://liaoxuefeng.com/index.html" password = "initpass" # 可以调整最大深度和循环次数 browser = BrowserConfig(browser_type='edge', headless=False) automation = AutomationWorkflow( browser, login_url, max_depth=10, # 最大递归深度 max_loop_count=100 # 最大循环次数 ) automation.run(password, login_required=True)
最新发布
08-30
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值