import os
import re
import argparse
import asyncio
from aiohttp import ClientSession
from bs4 import BeautifulSoup
# def findUrl(string):
# find all of url in this string
# url = re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', string)
# return url
def findUrl(text):
url = []
if isinstance(text, str):
# find all of url in this string
url = url + re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', text)
elif isinstance(text, list):
# find all of url in this list
for string in text:
url = url + re.findall('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', str(string))
else:
raise ValueError("Invalid type!")
return url
def doWrk(request_url, number_of_requests, threads_num, request_time):
# a single url
if isinstance(request_url, str):
print("wrk server")
print("./wrk -t " + threads_num + " -c " + number_of_requests + " -d " + request_time + " " + request_url)
Python GET页面并提取js中Url,对url进行wrk压测并行访问
最新推荐文章于 2024-04-03 21:41:00 发布
本文介绍了如何使用Python解析网页中的JavaScript,提取URL,并利用wrk工具进行并行访问压力测试。通过这个过程,读者可以了解到网络爬虫和性能测试的基本方法。

最低0.47元/天 解锁文章
9027

被折叠的 条评论
为什么被折叠?



