Kim在《LoadRunner as aWebCrawler》这篇文章中介绍了如何用LoadRunner实现一个简单的网络爬虫:
http://ptfrontline.wordpress.com/2008/04/07/loadrunner-as-a-webcrawler/
网络爬虫在性能测试中可以用在给“缓存暖身”上。
void Process_Level1()
{
int i;
char buf[2048];
char buf2[2048];
char *pos;
int res;
int count;
count = atoi(lr_eval_string("{URL_LIST1_count}"));
if (count > 0)
for ( i=1; i 0) res++;
if (res == 0)
{
lr_save_string( lr_eval_string(buf), "URL" );
// Replace & with & - NONSTANDARD FUNCTION
lr_replace( "URL", "&", "&" );
web_reg_save_param("URL_LIST2", // save all href="" URL's
"LB=href="",
"RB="",
"Ord=All",
"Search=Body",
"NotFound=Warning",
LAST );
web_url("URL",
"URL={BaseURL}{URL}",
"TargetFrame=",
"Resource=0",
"RecContentType=text/html",
"Mode=HTML",
LAST);
// Process all "URL_LIST2" entires
Process_Level2();
}
}
}
Vince Lozada把这个脚本完善了一下(用递归的方式访问每一个URL一次):
char **myList;
int numListElements = 0;
int listSize = 1;
Action()
{
web_reg_save_param("URL_LIST1",
"LB=href=/"",
"RB=/"",
"Ord=All",
"Search=Body",
"NotFound=Warning",
LAST );
web_url("Home Page",
"URL={BaseURL}",
"TargetFrame=",
"Resource=0",
"RecContentType=text/html",
"Referer=",
"Snapshot=t1.inf",
"Mode=HTML",
LAST);
Process_URLs(1);
free(myList);
myList = 0;
numListElements = 0;
listSize = 1;
return 0;
}
Process_URLs(int index)
{
int i;
int nextIndex;
char listName[255];
char listCountParamName[255];
char listItemParamName[255];
int count;
int res_count;
char *resourceName;
nextIndex = (index + 1);
sprintf(listCountParamName, "{URL_LIST%d_count}", index);
count = atoi(lr_eval_string(listCountParamName));
if (count > 0){
for (i = 1; i <= count; i++){
sprintf(listItemParamName, "{URL_LIST%d_%d}", index, i);
lr_save_string(lr_eval_string(listItemParamName), "URL");
if (isItemInList(lr_eval_string("{URL}")) == 0) {
char *str = (char *)malloc(sizeof(lr_eval_string("{URL}")));
str = lr_eval_string("{URL}");
addItemToList(str);
sprintf(listName, "URL_LIST%d", nextIndex);
web_reg_save_param(listName,
"LB=href=/"",
"RB=/"",
"Ord=All",
"Search=Body",
"NotFound=Warning",
LAST );
resourceName = (char *) strrchr(lr_eval_string("{URL}"), ‘/’);
web_url(resourceName,
"URL={BaseURL}{URL}",
"TargetFrame=",
"Resource=0",
"RecContentType=text/html",
"Mode=HTML",
LAST);
Process_URLs(nextIndex);
}
}
}
}
void addItemToList(char *item) {
char **newList;
int i;
if (!myList) {
myList = (char **) malloc(listSize * sizeof(char *));
}
if (++numListElements > listSize) {
newList = (char**) malloc(listSize * 2 * sizeof(char *));
for (i = 0; i < listSize; ++i) {
newList[i] = myList[i];
}
listSize *= 2;
free(myList);
myList = newList;
}
myList[numListElements - 1] = item;
}
int isItemInList(char *item) {
int i;
for (i = 0; i < numListElements; ++i) {
if (!strcmp(item, myList[i])) {
return 1;
}
}
return 0;
}
void printList() {
int i;
for (i = 0; i < numListElements; ++i) {
lr_output_message(myList[i]);
}
}
试了一下这个脚本,发现还不够完善,在处理链接的URL字符串时还要考虑得更周全。