写在前面
上次用 selenium爬虫写了个 快捷面板,太复杂,正好上次用requests登录了教务系统,就顺便把一些功能给写了,因为懒得写UI,直接弄成了命令行工具。
主要功能包括:
- 1、查询本学期成绩
- 2、查询选课列表
- 3、查询绩点排名
- 4、一键教学评估
其中第2个功能是为了以后一键抢课功能做准备的,等选课系统开放就开搞!
前置要求
需要百度OCR的API,主要为了自动识别验证码登录,所以要去
https://login.bce.baidu.com/?account=&redirect=http%3A%2F%2Fconsole.bce.baidu.com%2F%3Ffromai%3D1#/aip/overview
去创建一个文字识别应用,每天有50000次免费调用机会
创建完成可以获得上述
代码
修改以下 “ *** ” 部分为自己的信息即可,如果教务系统网址崩了,可以换一个。
所有需要填的信息都在开头。
import re
import time
import math
import requests
from aip import AipOcr
from bs4 import BeautifulSoup
class req_spider:
homeurl = "http://202.119.113.136/" #教务系统网址
account = "***" #学号
password = "***" #密码
### 以下为百度OCR API
APP_ID = '***'
API_KEY = '***'
SECRET_KEY = '***'
post_url = homeurl + 'loginAction.do'
yzm_url = homeurl + 'validateCodeAction.do'
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
}
self.session = requests.Session()
def get_yzm(self):
with open('yzm.png', 'wb') as f:
f.write(self.session.get(self.yzm_url, headers=self.headers).content)
def discern_yzm(self):
client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
def get_file_content(file_path):
with open(file_path, 'rb') as f:
return f.read()
image = get_file_content('yzm.png')
options = {'language_type': 'ENG', }
# 调用通用文字识别
result = client.basicGeneral(image, options) # 高精度接口 basicAccurate
for word in result['words_result']:
yzm = (word['words'])
yzm = re.sub('[\W_]+', '', yzm)
yzm.replace(" ", "")
return yzm
def login(self):
while True:
try:
self.get_yzm()
yzm = self.discern_yzm()
post_data = {
'zjh': self.account,
'mm': self.password,
'v_yzm': yzm,
}
loginResponse = self.session.post(self.post_url, data=post_data, headers=self.headers)
if ("学分制综合教务" in loginResponse.text):
print("登录成功!")
break
else:
print("验证码错误,正在重新登录~")
except:
print("出错啦,不是你账号密码错了就是服务器炸了o(╥﹏╥)o")
print("请检查一下账号密码及app相关填写,若正确则无需理会")
print("无限循环重试中~")
time.sleep(1)
def get_grades(self):
gradesResponse = self.session.get(self.homeurl + "bxqcjcxAction.do")
soup = BeautifulSoup(gradesResponse.text, 'lxml')
courses = [[cell.get_text(strip=True) for cell in row.find_all('td')]
for row in soup.find_all("tr", class_=True)]
print("#############################本学期成绩#############################")
print("{0:^15}\t{1:^5}\t{2:^10}\t{3:^5}\t{4:^5}\t{5:^5}"
.format('课程名', '学分', '课程属性', '课程平均分', '成绩', '名次', chr(12288)))
for course in courses:
course[2] = ''.join(re.findall('[\u4e00-\u9fa5]', course[2])) # 正则匹配中文汉字
print("{0:{6}<10}\t{1:^5}\t{2:^10}\t{3:^15}\t{4:^5}\t{5:^5}"
.format(course[2], course[4], course[5], course[8], course[9], course[10], chr(12288)))
print("##################################################################")
def get_courses(self):
coursesResponse = self.session.get(self.homeurl + 'xkAction.do?actionType=6')
soup = BeautifulSoup(coursesResponse.text, 'lxml')
courses = [[cell.get_text(strip=True) for cell in row.find_all('td')[1:8]]
for row in soup.find_all("tr", class_=True)]
print("#########################################选课列表#########################################")
print("{0:^10}\t{1:{6}<20}\t{2:{6}^4}\t{3:^5}\t{4:^10}\t{5:{6}^5}"
.format('课程号', '课程名', '课序号', '学分', '课程属性', '教师', chr(12288)))
for course in courses:
if course[1].isdigit() == False:
course[1] = ''.join(re.findall('[\u4e00-\u9fa5]', course[1])) # 正则匹配中文汉字
print("{0:^10}\t{1:{6}<20}\t{2:^10}\t{3:^5}\t{4:^10}\t{5:{6}^10}"
.format(course[0], course[1], course[2], course[3], course[4], course[6], chr(12288)))
print("########################################################################################")
def tech_pg(self):
post_data = {
'wjbm': '0000000345',
'bpr': '20140056',
'pgnr': '0601064',
}
tech_listResponse = self.session.get(self.homeurl + 'jxpgXsAction.do?oper=listJspm')
soup = BeautifulSoup(tech_listResponse.text, 'lxml')
links = soup.find_all('a')
cnt = 0
for link in links:
if cnt & 1:
print("正在评估第"+str(math.ceil(cnt/2))+"位老师~")
nums = re.findall(r'\d+', str(link))
nums.sort(key=lambda i: len(i), reverse=True)
post_data['wjbm'] = nums[0]
post_data['bpr'] = nums[1]
post_data['pgnr'] = nums[2]
self.session.post(self.homeurl + 'jxpgXsAction.do?oper=wjShow', data=post_data,
headers=self.headers)
if nums[0] == "0000000349": # 实践教学
post_data['0000000262'] = '15_1'
post_data['0000000263'] = '20_1'
post_data['0000000264'] = '15_1'
post_data['0000000265'] = '20_1'
post_data['0000000266'] = '15_1'
post_data['0000000267'] = '15_1'
post_data['0000000309'] = '0_1'
post_data['zgpj'] = "good"
elif nums[0] == "0000000345": # 课堂教学
post_data['0000000249'] = '15_1'
post_data['0000000250'] = '20_1'
post_data['0000000251'] = '20_1'
post_data['0000000252'] = '15_1'
post_data['0000000253'] = '15_1'
post_data['0000000254'] = '15_1'
post_data['0000000307'] = '0_1'
post_data['zgpj'] = "good"
elif nums[0] == "0000000351": # 体育教学
post_data['0000000280'] = '15_1'
post_data['0000000281'] = '20_1'
post_data['0000000282'] = '20_1'
post_data['0000000283'] = '15_1'
post_data['0000000284'] = '15_1'
post_data['0000000285'] = '15_1'
post_data['0000000313'] = '0_1'
post_data['zgpj'] = "good"
elif nums[0] == "0000000350": # 公共选修
post_data['0000000274'] = '15_1'
post_data['0000000275'] = '20_1'
post_data['0000000276'] = '20_1'
post_data['0000000277'] = '15_1'
post_data['0000000278'] = '15_1'
post_data['0000000279'] = '15_1'
post_data['0000000310'] = '0_1'
post_data['zgpj'] = "good"
else:
print("未处理此数据,请联系制作者")
continue
post_data['xumanyzg'] = 'zg'
post_data['wjbz'] = ''
self.session.post(self.homeurl + 'jxpgXsAction.do?oper=wjpg', data=post_data,
headers=self.headers)
cnt += 1
print("评估完成!请前往教务系统确认,如果没全部评估,请再运行一次!")
print("没有排序会剩一个,请自己排序~")
def get_rank(self):
rankResponse = self.session.get(self.homeurl + "reportFiles/bzrcx/jdpmcx.jsp?temp=1")
soup = BeautifulSoup(rankResponse.text, 'lxml')
infos = [[cell.get_text(strip=True) for cell in row.find_all('td')]
for row in soup.find_all("tr")]
print("####################绩点排名####################")
print("{:<10}\t{:<10}\t{:<10}\t{:<10}".
format(infos[-2][1] + ":" + infos[-1][1],
infos[-2][2] + ":" + infos[-1][2],
infos[-2][3] + ":" + infos[-1][3],
infos[-2][4] + ":" + infos[-1][4]))
print("{:<10}\t{:<10}\t{:<10}".
format(infos[-2][5] + ":" + infos[-1][5],
infos[-2][6] + ":" + infos[-1][6],
infos[-2][7] + ":" + infos[-1][7]))
print("{:<10}\t{:<10}\t{:<10}".
format(infos[-2][8] + ":" + infos[-1][8],
infos[-2][9] + ":" + infos[-1][9],
infos[-2][10] + ":" + infos[-1][10]))
print("##############################################")
if __name__ == '__main__':
user = req_spider()
user.login()
while True:
print("欢迎来到hhu命令行面板:")
print("输入数字:1 查询成绩 2 查询选课列表 3 绩点排名 4 教学评估 0 退出")
try:
flag = int(input())
if flag == 1:
user.get_grades()
elif flag == 2:
user.get_courses()
elif flag == 3:
user.get_rank()
elif flag ==4:
user.tech_pg()
elif flag == 0:
break
else:
print("ERROR:")
print("请输入0-4之间的数字!!!\n")
except:
print("ERROR:")
print("请输入数字!!!\n")