ssr的pac中加入学校图书馆数据库访问pac

在家的时候, 需要使用代理, 同时要使用图书馆的数据库, 两个代理有点冲突, 自己研究了一下
ssr 的规则, 发现比较简单.

可以页面后贴出的代码进行研究 pac.txt

pac 的内部完全就是 js 的语法逻辑, 不是十分熟悉, 写的有点硬, 可以自己考虑优化

执行从 FindProxyForURL 开始执行, 然后根据具体内容进行代理访问

按照学校代理的判断方式, 判断一个 url 是否符合图书馆给出的pac 地址,
将访问数据库的链接,执行 PROXY proxy2.lib.whu.edu.cn:81 代理链接, 第一次建立链接的时候, 需要使用图书馆 帐号密码进行验证,

剩余的链接进行 ssr 验证, 进行正常的代理 执行过程.

代码demo


// 默认使用的是 js 的语法

// 判断网站, 然后选择 代理位置
var direct = "DIRECT;";

var wall_proxy = function(){
   
    return "__PROXY__"; };
var wall_v6_proxy = function(){
   
    return "__PROXY__"; };

var ip_proxy = function(){
   
    return wall_proxy(); };
var ipv6_proxy = function(){
   
    return wall_v6_proxy(); };
var nowall_proxy = function(){
   
    return direct; };





/*
 * Copyright (C) 2014 breakwa11
 * https://github.com/breakwa11/gfw_whitelist
 */

var subnetIpRangeList = [
0,1,
167772160,184549376,	//10.0.0.0/8
2886729728,2887778304,	//172.16.0.0/12
3232235520,3232301056,	//192.168.0.0/16
2130706432,2130706688	//127.0.0.0/24
];

var hasOwnProperty = Object.hasOwnProperty;

function check_ipv4(host) {
   
   
	var re_ipv4 = /^\d+\.\d+\.\d+\.\d+$/g;
	if (re_ipv4.test(host)) {
   
   
		return true;
	}
}
function check_ipv6(host) {
   
   
	var re_ipv6 = /^\[?([a-fA-F0-9]{0,4}\:){1,7}[a-fA-F0-9]{0,4}\]?$/g;
	if (re_ipv6.test(host)) {
   
   
		return true;
	}
}
function check_ipv6_dns(dnsstr) {
   
   
	var re_ipv6 = /([a-fA-F0-9]{0,4}\:){1,7}[a-fA-F0-9]{0,4}(%[0-9]+)?/g;
	if (re_ipv6.test(dnsstr)) {
   
   
		return true;
	}
}
function convertAddress(ipchars) {
   
   
	var bytes = ipchars.split('.');
	var result = (bytes[0] << 24) |
	(bytes[1] << 16) |
	(bytes[2] << 8) |
	(bytes[3]);
	return result >>> 0;
}
function isInSubnetRange(ipRange, intIp) {
   
   
	for ( var i = 0; i < 10; i += 2 ) {
   
   
		if ( ipRange[i] <= intIp && intIp < ipRange[i+1] )
			return true;
	}
}
function getProxyFromIP(strIp) {
   
   
	var intIp = convertAddress(strIp);
	if ( isInSubnetRange(subnetIpRangeList, intIp) ) {
   
   
		return direct;
	}
	return wall_proxy();
}

// 默认执行此函数 // 需要在这里 校内网
function FindProxyForURL(url, host) {
   
   

	// 如果在校内网链接 数据库, 则直接连接
	var whu_lib_proxy = "PROXY proxy2.lib.whu.edu.cn:81";
	if(!WHU_lib_proxy(url,host))
	{
   
   
		return whu_lib_proxy;
	}

	if ( isPlainHostName(host) === true ) {
   
   
		return direct;
	}
	if ( check_ipv4(host) === true ) {
   
   
		return getProxyFromIP(host);
	}
	if ( check_ipv6(host) === true ) {
   
   
		return ipv6_proxy();
	}

	var strIp = dnsResolve(host);
	if ( !strIp ) {
   
   
		return wall_proxy();
	}
	
	return getProxyFromIP(strIp);
}

function FindProxyForURLEx(url, host) {
   
   
	if ( isPlainHostName(host) === true ) {
   
   
		return direct;
	}
	if ( check_ipv4(host) === true ) {
   
   
		return getProxyFromIP(host);
	}
	if ( check_ipv6(host) === true ) {
   
   
		return ipv6_proxy();
	}

	var strIp = dnsResolveEx(host);
	if ( !strIp ) {
   
   
		return wall_proxy();
	}
	if ( check_ipv6_dns(strIp) === true ) {
   
   
		return ipv6_proxy();
	}
	var dnsIps = strIp.split(";");
	if (check_ipv4(dnsIps[0]) === true) {
   
   
		return getProxyFromIP(dnsIps[0]);
	} else if (check_ipv6_dns(dnsIps[0]) === true) {
   
   
		return ipv6_proxy();
	}
	return wall_proxy();
}




//pac for whu lib
//Version: 20200117-14:14
//
function WHU_lib_proxy(url, host) {
   
   
	// 根据规则判定是否在 武汉大学  net 中
	var in_whu_net = true;
	var not_in_whu_net = false;

	var whu_lib_proxy = "PROXY proxy2.lib.whu.edu.cn:81";

// 在校内网络的情况
/* 
	if
	(
		//begin of direct client ip
		isInNet(myIpAddress(), "202.114.64.0", "255.255.255.0") ||
		isInNet(myIpAddress(), "202.114.66.0", "255.255.254.0") ||
		isInNet(myIpAddress(), "202.114.68.0", "255.255.252.0") ||
		isInNet(myIpAddress(), "202.114.72.0", "255.255.248.0") ||
		isInNet(myIpAddress(), "202.114.96.0", "255.255.240.0") ||
		isInNet(myIpAddress(), "218.197.144.0", "255.255.255.0") ||
		isInNet(myIpAddress(), "218.197.146.0", "255.255.254.0") ||
		isInNet(myIpAddress(), "218.197.148.0", "255.255.252.0") ||
		isInNet(myIpAddress(), "218.197.152.0", "255.255.248.0") ||
		isInNet(myIpAddress(), "222.20.192.0", "255.255.192.0") ||
		isInNet(myIpAddress(), "125.220.128.0", "255.255.224.0") ||
		isInNet(myIpAddress(), "192.168.254.0", "255.255.255.255"))
		return in_whu_net;
		//end of direct client ip;
	else if (
		//begin of direct name acl
		dnsDomainIs(host, "2shusheng.lib.whu.edu.cn") ||
		dnsDomainIs(host, "apps.lib.whu.edu.cn") ||
		dnsDomainIs(host, "counter.lib.whu.edu.cn") ||
		dnsDomainIs(host, "iras.lib.whu.edu.cn") ||
		dnsDomainIs(host, "metalib.lib.whu.edu.cn") ||
		dnsDomainIs(host, "opac.lib.whu.edu.cn") ||
		dnsDomainIs(host, "proxy2.lib.whu.edu.cn") ||
		dnsDomainIs(host, "proxy.lib.whu.edu.cn") ||
		dnsDomainIs(host, "seat.lib.whu.edu.cn") ||
		dnsDomainIs(host, "svpn.lib.whu.edu.cn") ||
		dnsDomainIs(host, "www.lib.whu.edu.cn") ||
		dnsDomainIs(host, "xenapp.lib.whu.edu.cn") ||
		dnsDomainIs(host, "ztcb.lib.whu.edu.cn") ||
		dnsDomainIs(host, ".dummy-domain1.domain"))
		return in_whu_net;
		//end of direct name acl;
	else if (
		//begin of ip name acl
		isInNet(host,"102.114.65.149","255.255.255.255") ||
		isInNet(host,"102.114.65.55","255.255.255.255") ||
		isInNet(host,"202.114.65.106","255.255.255.255") ||
		isInNet(host,"202.114.65.215","255.255.255.255") ||
		isInNet(host,"202.114.65.63","255.255.255.255") ||
		isInNet(host,"202.114.65.9","255.255.255.255") ||
		isInNet(host,"192.168.253.252","255.255.255.252"))
		return in_whu_net;
		//end of direct ip acl;
*/

// 不在校园网的情况
if (
	//begin of domain name acl
	dnsDomainIs(host, ".51cto.com") || host == "51cto.com" ||
	dnsDomainIs(host, ".51sjsj.com") || host == "51sjsj.com" ||
	dnsDomainIs(host, ".5cy.com") || host == "5cy.com" ||
	dnsDomainIs(host, ".5read.com") || host == "5read.com" ||
	dnsDomainIs(host, ".aacnjournals.org") || host == "aacnjournals.org" ||
	dnsDomainIs(host, ".aacrjournals.org") || host == "aacrjournals.org" ||
	dnsDomainIs(host, ".aacr.org") || host == "aacr.org" ||
	dnsDomainIs(host, ".aappublications.org") || host == "aappublications.org" ||
	dnsDomainIs(host, ".accessmedicine.com") || host == "accessmedicine.com" ||
	dnsDomainIs(host, ".accesspharmacy.com") || host == "accesspharmacy.com" ||
	dnsDomainIs(host, ".accesssurgery.com") || host == "accesssurgery.com" ||
	dnsDomainIs(host, ".acgpublishing.com") || host == "acgpublishing.com" ||
	dnsDomainIs(host, ".acm.org") || host == "acm.org" ||
	dnsDomainIs(host, ".acpjc.org") || host == "acpjc.org" ||
	dnsDomainIs(host, ".acponline.org") || host == "acponline.org" ||
	dnsDomainIs(host, ".acs.org") || host == "acs.org" ||
	dnsDomainIs(host, ".acs.org.ccindex.cn") || host == "acs.org.ccindex.cn" ||
	dnsDomainIs(host, ".a.ebscohost.com") || host == "a.ebscohost.com" ||
	dnsDomainIs(host, "a.example.com") || host == "a.example.com" ||
	dnsDomainIs(host, ".agu.org") || host == "agu.org" ||
	dnsDomainIs(host, ".aiaa.org") || host == "aiaa.org" ||
	dnsDomainIs(host, ".aidsonline.com") || host == "aidsonline.com" ||
	dnsDomainIs(host, ".aip.org") || host == "aip.org" ||
	dnsDomainIs(host, ".airiti.com.cn") || host == "airiti.com.cn" ||
	dnsDomainIs(host, ".airitilibrary.cn") || host == "airitilibrary.cn" ||
	dnsDomainIs(host, ".airitinpm.com") || host == "airitinpm.com" ||
	dnsDomainIs(host, ".ajax.googleapis.com") || host == "ajax.googleapis.com" ||
	dnsDomainIs(host, "ajax..googleapis.com") || host == "ajax..googleapis.com" ||
	dnsDomainIs(host, ".ajcn.org") || host == "ajcn.org" ||
	dnsDomainIs(host, ".ajhp.org") || host == "ajhp.org" ||
	dnsDomainIs(host, ".ajnr.org") || host == "ajnr.org" ||
	dnsDomainIs(host, ".ajtmh.org") || host == "ajtmh.org" ||
	dnsDomainIs(host, ".akademiai.com") || host == "akademiai.com" ||
	dnsDomainIs(host, ".alexanderstreet.com") || host == "alexanderstreet.com" ||
	dnsDomainIs(host, ".allenpress.com") || host == "allenpress.com" ||
	dnsDomainIs(host, ".alphamedpress.org") || host == "alphamedpress.org" ||
	dnsDomainIs(host, ".ama-assn.org") || host == "ama-assn.org" ||
	dnsDomainIs(host, ".amdigital.co.uk") || host == "amdigital.co.uk" ||
	dnsDomainIs(host, ".amjbot.org") || host == "amjbot.org" ||
	dnsDomainIs(host, ".amjmedsci.com") || host == "amjmedsci.com" ||
	dnsDomainIs(host, ".amjpathol.org") || host == "amjpathol.org" ||
	dnsDomainIs(host, ".ams.org") || host == "ams.org" ||
	dnsDomainIs(host, ".anatomy.tv") || host == "anatomy.tv" ||
	dnsDomainIs(host, ".anb.org") || host == "anb.org" ||
	dnsDomainIs(host, ".angle.com.tw") || host == "angle.com.tw" ||
	dnsDomainIs(host, ".annals.org") || host == "annals.org" ||
	dnsDomainIs(host, ".annualreviews.org") || host == "annualreviews.org" ||
	dnsDomainIs(host, ".apabi.com") || host == "apabi.com" ||
	dnsDomainIs(host, ".aps.org") || host == "aps.org" ||
	dnsDomainIs(host, ".archive.nstl.gov.cn") || host == "archive.nstl.gov.cn" ||
	dnsDomainIs(host, ".artlib.cn") || host == "artlib.cn" ||
	dnsDomainIs(host, ".asahi.com") || host == "asahi.com" ||
	dnsDomainIs(host, ".ascelibrary.org") || host == "ascelibrary.org" ||
	dnsDomainIs(host, ".asce.org") || host == "asce.org" ||
	dnsDomainIs(host, ".asme.org") || host == "asme.org" ||
	dnsDomainIs(host, ".asm.org") || host == "asm.org" ||
	dnsDomainIs(host, ".aspbjournals.org") || host == "aspbjournals.org" ||
	dnsDomainIs(host, ".aspetjournals.org") || host == "aspetjournals.org" ||
	dnsDomainIs(host, ".astm.org") || host == "astm.org" ||
	dnsDomainIs(host, ".atypon-link.com?") || host == "atypon-link.com?" ||
	dnsDomainIs(host, 
### 抓取 SSR1 数据并存储至数据库的方法 为了实现这一目标,可以采用 Python 编写的爬虫程序来抓取 SSR1 上的数据,并通过 MongoDB 或其他类型的数据库进行数据持久化。下面介绍具体的方案。 #### 准备工作 确保安装了必要的库文件: ```bash pip install pymongo requests beautifulsoup4 ``` 这一步骤是为了能够顺利执行后续的操作,`pymongo` 用来连接 MongoDB 数据库;`requests` 负责发起 HTTP 请求获取网页内容;而 `beautifulsoup4` 则帮助解析 HTML 文档以便提取所需的信息[^3]。 #### 获取页面内容 利用 `urllib3` 发起 GET 请求访问指定 URL 地址,这里以 SSR1 网站为例说明如何取得其首页的内容。需要注意的是,在实际开发过程中应当遵循目标站点的服务条款以及 robots.txt 文件的规定,合理合法地开展网络爬取活动。 ```python import urllib3 # 创建 PoolManager 实例对象 http = urllib3.PoolManager() # 定义要爬取的目标URL url = 'https://ssr1.scrape.center/' try: # 使用 request 方法发送请求 response = http.request('GET', url) # 检查状态码是否正常(200表示成功) if response.status == 200: html_content = response.data.decode('utf-8') print("获取页面成功") except Exception as e: print(f"发生错误: {e}") ``` 上述代码展示了如何向特定网址发出请求并判断响应结果的状态码,当一切正常时则继续下一步操作——即分析所获得的HTML文档结构从而定位感兴趣的部分。 #### 解析与抽取信息 接下来借助 BeautifulSoup 对象对刚才得到的字符串形式的 HTML 进行解析,进而从中抽取出有用的数据项。假设现在想要收集电影名称及其评分,则可按照如下方式编写相应逻辑: ```python from bs4 import BeautifulSoup def parse_html(html): soup = BeautifulSoup(html, 'html.parser') items = [] for item in soup.select('.el-card__body'): title_element = item.find(class_='name').get_text(strip=True) score_element = item.find(class_='score').get_text(strip=True).replace('\n',' ') movie_info = { 'title': title_element, 'score': float(score_element) if score_element != "" else None } items.append(movie_info) return items ``` 此部分实现了基于 CSS Selector 的元素查找功能,并针对每一条记录构建字典格式的结果集,最终形成列表供后续处理使用[^1]。 #### 将数据存入 MongoDB 最后一步便是把之前整理好的资料写入 NoSQL 类型的非关系型数据库中去。考虑到 MongoDB 支持灵活多变的数据模型非常适合用来保存此类半结构化的信息源,因此选择它作为演示案例之一。 ```python from pymongo import MongoClient client = MongoClient('mongodb://localhost:27017/') db = client['movies'] collection = db['movie_list'] for entry in parsed_items: collection.insert_one(entry) print("数据已全部入库完成") ``` 以上片段建立了同本地部署的 MongoDB 实例之间的通信链路,并指定了待操作的具体集合名(相当于传统意义上的表)。接着遍历先前准备好的条目数组逐个调用 insert_one() API 来新增文档节点直至整个流程结束为止。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值