Ch7_Search_and_Sort_code

本文深入解析了多种经典的排序算法,包括迭代和递归的二分查找(BinarySearch),插入排序(Insert),选择排序(SelectionSort),冒泡排序(BubbleSort),希尔排序(ShellSort),快速排序(QuickSort),堆排序(HeapSort),以及归并排序(MergeSort)的实现原理与代码示例。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

BinarySearch_Iteration

void BinarySearch_Iteration(A[],int key,int n)
{
	l = 1;  //l:left  最左邊
	r = n;  //r:right 最右邊
	while(l<=r)
	{
		m = (l + r)/2;
		switch(Compare(key,A[m]))
		{
			case "==" : return m;  //找到了
			case "<"  : r = m - 1;  //最右邊到m-1
			case ">"  : l = m + 1;  //最左邊到m+1
		}
	}
	return 0; //找不到key
}

BinarySearch_Recursive

void BinarySearch_Recursive(A[],int l,int r,int key)
{
	if(l<=r)
	{
		m = (l + r)/2;
		switch(Compare(key,A[m]))
		{	
		    case "==" : return m;   //找到了
			case "<"  : BinarySearch_Recursive(A[],l,m-1,key);
			case ">"  : BinarySearch_Recursive(A[],m+1,r,key);  
		}
	}
	return 0; //找不到key
}

Insert

void Insert(A[],r,i)
{
	//將r插進A[0]~A[i]已經排好的陣列
	j = i;
	while(r < A[i]) //從最大開始往前比
	{
		A[j+1] = A[j]; //比r大的往後搬
		j--;        //往前
	}
	A[j+1] = r;
}
void Insert_main(A[],n)
{
	A[0] = -∞;   //防止如果r是子串列的min,會overflow
	for(i=2;i<=n;i++)
	{
		Insert(A,A[2],i-1);  //從第2個開始排列
	}
}

SelectionSort

void SelectionSort(A[],n)
{
	for(i=1;i<=n;i++)
	{
		min = i;
		for(j=i+1;j<=n;j++)
		{
			if(A[j]<A[min])  //比min小,取代min
			{
				min = j;
			}
		}
		if(min!=i)
		{
			swap(A[i],A[min]);
		}
	}
}

BubbleSort

void BubbleSort(A[],n)
{
	for(i=1;i<=n-1;i++)  //最多做(n-1)回合
	{
		f = 0;
		for(j=1;j<=n-i;j++)
		{
			if(A[j]>A[j+1]);
			swap(A[j],A[j+1];
			f=1;
		}
		if(f==0)
		{
			exit;
		}
	}
}	

ShellSort

void ShellSort(A[],n)
{
	span = n/2;
	while(span>=1)  //算到span = 1 為止
	{
		do
		{
			f = 0;  //每輪都將f reset,以利看有沒有swap
			for(i=1;i<=n-span;i++)
			{
				if(A[i]<A[i+span])
				{
				swap(A[i],A[i+span]);
				f = 1;
				}
			}
		}
		while(f!=0);//無swap跳出
		span = span/2;
	}
}

QuickSort_DS

void QuickSort_DS(A[],l,r)
{
	if(l < r)
	{
		pk = A[l];  //以最左為初始pk
		i = l;
		j = r + 1;
        do
		{
			do
			{
				i = i + 1;   //往後
			}
			while(A[i]<pk);  //直到找到比pk大的
			do
			{
				j = j - ;    //往前
			}
			while(A[j]>pk);  //直到找到比pk小的
			if(i<j)
			{
				swap(A[i],A[j]);  //交換
			}	
		}
		while(i<j);
		swap(A[l],A[j]); //將pk放到正確位置
		QuickSort_DS(A[],l,j-1); //右邊QuickSort
		QuickSort_DS(A[],j+1,r); //左邊QuickSort
	}
}

QuickSort_Algo

void QuickSort_Algo(A[],p,r)
{
	if(p<r)
	{
		q = Partition(A[],p,r);
		QuickSort_Algo(A[],p,q-1);
		QuickSort_Algo(A[],q+1,r);
	}
}
void Partition(A[],p,r)
{
	x = A[r]; //最右邊當pk
	i = P - 1;//p的前1格,方便後面算
	for(j=p;j<=r-1;j++) //從頭部掃到r-1
	{
		if(A[j]<=x)         //小於等於pk
		{
			i = i + 1;      //i先往前移
			swap(A[i],A[j]);//再交換
		}
	}
	swap(A[r],A[i+1]);      //pk跟最後一個換的"下一個"交換
	return (i+1);           //回傳最後一個換的"下一個"位置
}

MergeSort_Recursive

void MergeSort_Recursive(A[],l,u,p[])
{   //A[l]~A[u]排序形成new runs p[]
    if(l>=u)  //只有一筆
	{
		return p[] = A[l];
	}
	else
	{
		m = (l+u)/2;
		MergeSort_Recursive(A[],l,m,q[]);   //A[l]~A[m]排序形成new run q[]
		MergeSort_Recursive(A[],m+1,u,r[]); //A[M+1]~A[U]排序形成new run r[]
		MergeTwoRuns(A[],q[],r[],P[]);      //q[],r[]合併
	}
}

HeapSort

void HeapSort(Tree,n)
{
	for(i=n/2;i>=1;i--)
	{
		AdjustHeap(tree,i,n);  //將tree化為MAX-Heap
	}
    for(i=n-1;i>=1;i--)
	{
		swap(tree[1],tree[i+1]); //將root(MAX)與最後一個(n)node交換
		AdjustHeap(tree,1,i);    //剩下的化為MAX-Heap
	}
}

Find_ith_min

void Find_ith_min(A[],p,r,i)
{
	//在A[P]~A[r]中找出i'th小
	q = Partition(A[],p,r); //找出pk的正確位置q
	k = q - p + 1;          //q是第 k'th 小
	if(i == k)
	{
		return A[q];
	}
	else if(i<k)
	{
		return Find_ith_min(A[],p,q-1,i);
	}
	else
	{
		return Find_ith_min(A[],q+1,r,i-k);  //i-k:因為分開算要把前面k個減掉
	}
}
给你代码你看一下,再决定,urils.py的import time import requests import json import math import random import typing class XHS_Splier_utils(): def __init__(self): self.URL="https://edith.xiaohongshu.com" self.JS_DOM_APTH=r"D:\lws_tanchen\project\tanchen_py_dy\tanchen_data_analysis\core\Crawler\xhs_crawler\node_modules\jsdom\lib" def search_generate_x_b3_traceid(self,len=16): x_b3_traceid = "" for t in range(len): x_b3_traceid += "abcdef0123456789"[math.floor(16 * random.random())] return x_b3_traceid def search_trans_cookies(self,cookies_str:str): if '; ' in cookies_str: ck = {i.split('=')[0]: '='.join(i.split('=')[1:]) for i in cookies_str.split('; ')} else: ck = {i.split('=')[0]: '='.join(i.split('=')[1:]) for i in cookies_str.split(';')} return ck def search_generate_xs_xs_common(self,a1, api, data=''): import execjs """ execjs 是一个 允许在 Python 环境中执行 JavaScript 代码 pip install PyExecJS==1.5.1 """ try: with open('./static/xhs_xs_xsc_56.js', 'r', encoding='utf-8') as f: js_code = f.read() js = execjs.compile(js_code, cwd=self.JS_DOM_APTH) except Exception as e: with open('./static/xhs_xs_xsc_56.js', 'r', encoding='utf-8') as f: js_code = f.read() js = execjs.compile(js_code, cwd=self.JS_DOM_APTH) ret = js.call('get_request_headers_params', api, data, a1) xs, xt, xs_common = ret['xs'], ret['xt'], ret['xs_common'] return xs, xt, xs_common def search_generate_xray_traceid(self): import execjs try: # 读取 JS 文件 with open('./static/xhs_xray.js', 'r', encoding='utf-8') as f: js_code = f.read() xray_js = execjs.compile(js_code, cwd=self.JS_DOM_APTH) except Exception as e: with open('../static/xhs_xray.js', 'r', encoding='utf-8') as f: js_code = f.read() xray_js = execjs.compile(js_code, cwd=self.JS_DOM_APTH) return xray_js.call('traceId') def search_get_request_headers_template(self): return { "authority": "edith.xiaohongshu.com", "accept": "application/json, text/plain, */*", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "cache-control": "no-cache", "content-type": "application/json;charset=UTF-8", "origin": "https://www.xiaohongshu.com", "pragma": "no-cache", "referer": "https://www.xiaohongshu.com/", "sec-ch-ua": "\"Not A(Brand\";v=\"99\", \"Microsoft Edge\";v=\"121\", \"Chromium\";v=\"121\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0", "x-b3-traceid": "", "x-mns": "unload", "x-s": "", "x-s-common": "", "x-t": "", "x-xray-traceid": self.search_generate_xray_traceid() } def search_generate_headers(self,a1, api, data): xs, xt, xs_common = self.search_generate_xs_xs_common(a1, api, data) x_b3_traceid = self.search_generate_x_b3_traceid() headers = self.search_get_request_headers_template() headers['x-s'] = xs headers['x-t'] = str(xt) headers['x-s-common'] = xs_common headers['x-b3-traceid'] = x_b3_traceid if data: data = json.dumps(data, separators=(',', ':'), ensure_ascii=False) return headers, data def search_generate_request_params(self,cookies_str:str,api:str,data): cookies = self.search_trans_cookies(cookies_str) a1 = cookies['a1'] headers, data = self.search_generate_headers(a1, api, data) return headers, cookies, data def single_search_api(self,query:str,cookies_str:str,page:int=1,sort_type_choice:int=0,note_type:int=0,note_time:int=0,note_range:int=0,pos_distance:int=0,geo:str=None,proxies: dict = None): """ 获取搜索笔记的结果 :param query 搜索的关键词 :param cookies_str 你的cookies :param page 搜索的页数 :param sort_type_choice 排序方式 0 综合排序, 1 最新, 2 最多点赞, 3 最多评论, 4 最多收藏 :param note_type 笔记类型 0 不限, 1 视频笔记, 2 普通笔记 :param note_time 笔记时间 0 不限, 1 一天内, 2 一周内天, 3 半年内 :param note_range 笔记范围 0 不限, 1 已看过, 2 未看过, 3 已关注 :param pos_distance 位置距离 0 不限, 1 同城, 2 附近 指定这个必须要指定 geo :param geo = "{ # # 经纬度 # "latitude": 39.9725, # "longitude": 116.4207 # }" str类型 :param proxies={ "http":"", "https":"" }dict类型 返回搜索的结果 """ res_json = None sort_type = "general" if sort_type_choice == 1: sort_type = "time_descending" elif sort_type_choice == 2: sort_type = "popularity_descending" elif sort_type_choice == 3: sort_type = "comment_descending" elif sort_type_choice == 4: sort_type = "collect_descending" filter_note_type = "不限" if note_type == 1: filter_note_type = "视频笔记" elif note_type == 2: filter_note_type = "普通笔记" filter_note_time = "不限" if note_time == 1: filter_note_time = "一天内" elif note_time == 2: filter_note_time = "一周内" elif note_time == 3: filter_note_time = "半年内" filter_note_range = "不限" if note_range == 1: filter_note_range = "已看过" elif note_range == 2: filter_note_range = "未看过" elif note_range == 3: filter_note_range = "已关注" filter_pos_distance = "不限" if pos_distance == 1: filter_pos_distance = "同城" elif pos_distance == 2: filter_pos_distance = "附近" if geo: geo = json.dumps(geo, separators=(',', ':')) try: """ query = "榴莲" query_num = 10 sort_type_choice = 0 # 0 综合排序, 1 最新, 2 最多点赞, 3 最多评论, 4 最多收藏 note_type = 0 # 0 不限, 1 视频笔记, 2 普通笔记 note_time = 0 # 0 不限, 1 一天内, 2 一周内天, 3 半年内 note_range = 0 # 0 不限, 1 已看过, 2 未看过, 3 已关注 pos_distance = 0 # 0 不限, 1 同城, 2 附近 指定这个1或2必须要指定 geo # geo = { # # 经纬度 # "latitude": 39.9725, # "longitude": 116.4207 # } """ api="/api/sns/web/v1/search/notes" data = { "keyword": query, "page": page, "page_size": 20, "search_id": self.search_generate_x_b3_traceid(21), "sort": "general", "note_type": 0, "ext_flags": [], "filters": [ { "tags": [ sort_type ], "type": "sort_type" }, { "tags": [ filter_note_type ], "type": "filter_note_type" }, { "tags": [ filter_note_time ], "type": "filter_note_time" }, { "tags": [ filter_note_range ], "type": "filter_note_range" }, { "tags": [ filter_pos_distance ], "type": "filter_pos_distance" } ], "geo": geo, "image_formats": [ "jpg", "webp", "avif" ] } headers, cookies, data = self.search_generate_request_params(cookies_str,api,data) response=requests.post(url=self.URL+api,headers=headers,data=data.encode("utf-8"),cookies=cookies,proxies=proxies) res_json=response.json() success,msg=res_json["success"],res_json["msg"] except Exception as e: success=False msg=str(e) return success,msg,res_json def all_search_api(self, query: str, cookies_str: str, require_num: int=500,sort_type_choice=0, note_type=0, note_time=0, note_range=0, pos_distance=0, geo="", proxies: dict = None): """ 指定数量搜索笔记,设置排序方式和笔记类型和笔记数量 PC端爬虫最多也就200多条,默认设置获取500条 设置一个随机延迟时间,不要搞那么快,可以手动调节范围,或者取消 :param query 搜索的关键词 :param require_num 搜索的数量默认500条 :param cookies_str 你的cookies :param sort_type_choice 排序方式 0 综合排序, 1 最新, 2 最多点赞, 3 最多评论, 4 最多收藏 :param note_type 笔记类型 0 不限, 1 视频笔记, 2 普通笔记 :param note_time 笔记时间 0 不限, 1 一天内, 2 一周内天, 3 半年内 :param note_range 笔记范围 0 不限, 1 已看过, 2 未看过, 3 已关注 :param pos_distance 位置距离 0 不限, 1 同城, 2 附近 指定这个必须要指定 geo :param geo: 定位信息 经纬度 返回搜索的结果 """ page = 1 note_list = [] try: while True: success, msg, res_json = self.single_search_api(query, cookies_str, page, sort_type_choice, note_type, note_time, note_range, pos_distance, geo, proxies) if not success: raise Exception(msg) if "items" not in res_json["data"]: break notes = res_json["data"]["items"] note_list.extend(notes) page += 1 if len(note_list) >= require_num or not res_json["data"]["has_more"]: break print("note_list",note_list) #time.sleep(random.randint(1,5)) except Exception as e: success = False msg = str(e) if len(note_list) > require_num: note_list = note_list[:require_num] return success, msg, note_list COOKIES='abRequestId=0ce8e19f-c357-5b21-a435-349316627ab2; a1=19777a5b680yiog7y6vwx1erczenf5aup16obr46u50000367007; webId=167052dbe181764057d527bed85d8ae8; gid=yjWWW02SYJU2yjWWW02DKyF4Y8v3lkWvKhECKK7ydFSudl28Ii207D888qKW88W80yDD88y2; xsecappid=xhs-pc-web; webBuild=4.72.0; web_session=040069b24a794e37836707ee413a4b6f92710c; unread={%22ub%22:%22686e4bfe000000000b01d57c%22%2C%22ue%22:%226862b458000000001c034d32%22%2C%22uc%22:27}; acw_tc=0ad583f417524771866795359e8c06b0dd25f3e6eb69b5f2e090ce575d9b27; websectiga=3633fe24d49c7dd0eb923edc8205740f10fdb18b25d424d2a2322c6196d2a4ad; sec_poison_id=18be10d8-a62f-4f6b-a4cf-3c734e2af7d0; loadts=1752477207049' # success, msg, notes=XHS_Splier_utils().single_search_api(query="榴莲",cookies_str=COOKIES) # print(notes) success, msg, notes=XHS_Splier_utils().all_search_api(query="榴莲",cookies_str=COOKIES) print("notes",notes) print("长度",len(notes)),和xhs_creator_xs.js的代码片段const crypto = require('crypto'); let key = 'glt6h61ta7kisow7' let iv = '4hrivgw5s342f9b2' key = Buffer.from(key); iv = Buffer.from(iv);下面都是函数调用,xhs_xray.js的self = global; window = global; var zc666;下面也都是函数调用,xhs_xray_pack1.js的内容(self.webpackChunkxhs_pc_web = self.webpackChunkxhs_pc_web || []).push([[861], {}),xhs_xray_pack2.js的内容(self.webpackChunkxhs_pc_web = self.webpackChunkxhs_pc_web || []).push([[121], {}),和xhs_xs_xsc_56.js的const jsdom = require("jsdom"); const { JSDOM } = jsdom; const html = "<!DOCTYPE html><p></p>"; const resourceLoader = new jsdom.ResourceLoader({ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36", }); const dom = new JSDOM(html,{ url: "https://www.xiaohongshu.com", referrer: "https://www.xiaohongshu.com", contentType: "text/html", resources: resourceLoader, }) window = dom.window; document = window.document; // DOMParser = window.DOMParser; // location = window.location; // navigator = window.navigator; // localStorage = window.localStorage; // class AudioContextMock { // constructor() { // } // } // class webkitAudioContextMock { // constructor() { // } // } // var indexedDB = {} // var canvas = { // toDataURL: function toDataURL() { // }, // getContext: function getContext(x) { // } // }; window.document.cookie = "a1=1927f6098768njq4co9jqukn0qtc8irx7u3ixrnxs50000565146;" // history = {} // Image = function () { // // } // PluginArray = function () { // // } // indexedDB = function () { // // } // WebSocket = function () { // // } var esm_typeof = { Z: function (t) { return typeof t; }, };var r=[],还有个node_modules下面有个jsdom文件夹
07-18
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值