前提:因为我姐需要帮她挂机刷学时~而我自己又懒~于是就直接谷歌了一下…
在这位大神的前提下修改了一下:https://www.52pojie.cn/thread-1105553-1-1.html
主要这位大神的代码是直接还有多少个未学习/正在学习的都全部打开窗口,不符合最新规则只能打开一门进行学习
因为自己学python 也不久.菜逼一个~
使用前需要安装浏览器对应的驱动,代码使用的是谷歌浏览器,因此自行百度/谷歌如何下载安装谷歌浏览器驱动~
python建议直接安装Anaconda吧,就不用每个都pip了~
ps:记得弄个网络好哒~我这边没考虑啥报错的问题~如果遇上什么鹏博士那些的话就节哀吧~
代码如下:
from selenium import webdriver
import pyautogui
import time
import math
def switch2frame(par):
par.switch_to.frame('secondIframe')
par.switch_to.frame('thirdIframe')
par.switch_to.frame('dataMainIframe')
def run_main(video_unstudy_num, browser):
if int(video_unstudy_num) > 0:
print("nonlocal--该目录下还有{}个视频未学习……".format(video_unstudy_num))
js_click = 'document.getElementsByClassName("courseware-list-reed")[0].click()'
browser.execute_script(js_click)
time.sleep(3)
# 拿到所有的窗口
all_handles = browser.window_handles
pre_window_handle = browser.current_window_handle
for handle in all_handles:
if handle != pre_window_handle:
browser.switch_to.window(handle)
browser.implicitly_wait(10)
# time.sleep(10)
# elem = browser.find_element_by_class_name("introjs-button")
elem = browser.find_element_by_link_text('好的,我知道了')
elem.click()
time.sleep(2)
browser.switch_to.frame('course_frame')
time.sleep(10)
# 点击播放
js_paused = 'return document.getElementById("my-video_html5_api").paused;'
view_paused_status = browser.execute_script(js_paused)
print('viewPaused:' + str(view_paused_status))
# false 点击了播放 true 点击了暂停
if view_paused_status:
elem = browser.find_element_by_class_name("vjs-play-control")
elem.click()
time.sleep(5)
# 获取视频播放时长?
js_duration_str = 'return document.getElementById("my-video_html5_api").duration;'
view_time = browser.execute_script(js_duration_str)
print('viewTime:' + str(view_time))
time.sleep(5)
js_current_time_str = 'return document.getElementById("my-video_html5_api").currentTime;'
view_current_time = browser.execute_script(js_current_time_str);
print('viewCurrentTime:' + str(view_current_time))
if math.ceil(view_current_time) >= math.ceil(view_time):
print('1111')
browser.switch_to.default_content()
elem = browser.find_element_by_id('btnexit')
elem.click()
# 关闭视频网站页面 进入pre_window_handle页面
browser.switch_to.window(pre_window_handle)
browser.refresh()
browser.implicitly_wait(10)
switch2frame(browser)
js_list = 'return document.getElementsByClassName("courseware-list-reed").length;'
video_unstudy_num = browser.execute_script(js_list)
time.sleep(3)
# print("local--该目录下还有{}个视频未学习……".format(video_unstudy_num))
run_main(video_unstudy_num, browser)
else:
print('2222')
time.sleep(math.ceil(view_time) - math.ceil(view_current_time))
browser.switch_to.default_content()
elem = browser.find_element_by_id('btnexit')
elem.click()
# 关闭视频网站页面 进入pre_window_handle页面
browser.switch_to.window(pre_window_handle)
browser.refresh()
browser.implicitly_wait(10)
switch2frame(browser)
js_list = 'return document.getElementsByClassName("courseware-list-reed").length;'
video_unstudy_num = browser.execute_script(js_list)
time.sleep(3)
# print("local--该目录下还有{}个视频未学习……".format(video_unstudy_num))
run_main(video_unstudy_num, browser)
else:
print("该目录下还有视频已学习完毕……")
def main():
# 输入账号
username = "*****"
# 输入密码
passwd = "*****"
login_url = 'https://gbpx.gd.gov.cn/gdceportal/index.aspx'
option = webdriver.ChromeOptions()
option.add_argument('--mute-audio')
browser = webdriver.Chrome(options=option)
browser.get(login_url)
browser.implicitly_wait(10)
# 窗口最大化
browser.maximize_window()
elem = browser.find_element_by_xpath('//*[@id="pnlLogin"]/div[1]/div[2]')
elem.click()
time.sleep(1)
elem = browser.find_element_by_id("txtLoginName")
elem.clear()
elem.send_keys(username)
time.sleep(1)
elem = browser.find_element_by_id("txtPassword")
elem.clear()
elem.send_keys(passwd)
time.sleep(1)
# 验证码
code_num = pyautogui.prompt("请输入验证码:")
elem = browser.find_element_by_id("txtValid")
elem.clear()
elem.send_keys(code_num)
elem = browser.find_element_by_xpath('//*[@id="user-login-form"]/div[2]/input[1]')
elem.click()
time.sleep(3)
elem = browser.find_element_by_id('btnStudy')
elem.click()
time.sleep(3)
# browser.switch_to_frame('secondIframe')
# browser.switch_to.frame('secondIframe')
# browser.switch_to.frame('thirdIframe')
# browser.switch_to.frame('dataMainIframe')
switch2frame(browser)
time.sleep(1)
js_list = 'return document.getElementsByClassName("courseware-list-reed").length;'
video_unstudy_num = browser.execute_script(js_list)
time.sleep(3)
run_main(video_unstudy_num, browser)
browser.close()
print("end......")
if __name__ == '__main__':
main()
然后在玩这个的同时,又想着能不能直接让python自己去识别验证码呢?
于是乎就在上面的代码基础上,弄了一个自动识别验证码的脚本,但是pytesseract识别率是真的差(可能我百度来的脚本不行?)
使用前下面脚本时,需要先预装tesseract-ocr,怎么下载安装记得百度噢
代码如下:
from selenium import webdriver
import pyautogui
import time
import math
from PIL import Image # 用于打开图片和对图片处理
import pytesseract # 用于图片转文字
import re
def switch2frame(par):
par.switch_to.frame('secondIframe')
par.switch_to.frame('thirdIframe')
par.switch_to.frame('dataMainIframe')
def processing_image(img_obj):
# 转灰度
print('转灰度1')
img = img_obj.convert("L")
print('转灰度2')
pixdata = img.load()
w, h = img.size
threshold = 160
# 遍历所有像素,大于阈值的为黑色
for y in range(h):
for x in range(w):
if pixdata[x, y] < threshold:
pixdata[x, y] = 0
else:
pixdata[x, y] = 255
return img
def delete_spot(img_obj):
print('delete_spot')
images = processing_image(img_obj)
data = images.getdata()
w, h = images.size
black_point = 0
for x in range(1, w - 1):
for y in range(1, h - 1):
mid_pixel = data[w * y + x] # 中央像素点像素值
if mid_pixel < 50: # 找出上下左右四个方向像素点像素值
top_pixel = data[w * (y - 1) + x]
left_pixel = data[w * y + (x - 1)]
down_pixel = data[w * (y + 1) + x]
right_pixel = data[w * y + (x + 1)]
# 判断上下左右的黑色像素点总个数
if top_pixel < 10:
black_point += 1
if left_pixel < 10:
black_point += 1
if down_pixel < 10:
black_point += 1
if right_pixel < 10:
black_point += 1
if black_point < 1:
images.putpixel((x, y), 255)
black_point = 0
return images
def image_str(img_obj):
print("image_str")
image = delete_spot(img_obj)
# 设置pyteseract路径
pytesseract.pytesseract.tesseract_cmd = r"D:\Program Files (x86)\Tesseract-OCR\tesseract"
# 图片转文字
result = pytesseract.image_to_string(image)
print(result)
# 去除识别出来的特殊字符
resultj = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", result)
print(resultj)
result_four = resultj[0:4]
return result_four
def run_main(video_unstudy_num, browser):
if int(video_unstudy_num) > 0:
print("nonlocal--该目录下还有{}个视频未学习……".format(video_unstudy_num))
js_click = 'document.getElementsByClassName("courseware-list-reed")[0].click()'
browser.execute_script(js_click)
time.sleep(3)
# 拿到所有的窗口
all_handles = browser.window_handles
pre_window_handle = browser.current_window_handle
for handle in all_handles:
if handle != pre_window_handle:
browser.switch_to.window(handle)
browser.implicitly_wait(10)
# time.sleep(10)
# elem = browser.find_element_by_class_name("introjs-button")
elem = browser.find_element_by_link_text('好的,我知道了')
elem.click()
time.sleep(2)
browser.switch_to.frame('course_frame')
time.sleep(10)
# 点击播放
js_paused = 'return document.getElementById("my-video_html5_api").paused;'
view_paused_status = browser.execute_script(js_paused)
print('viewPaused:' + str(view_paused_status))
# false 点击了播放 true 点击了暂停
if view_paused_status:
elem = browser.find_element_by_class_name("vjs-play-control")
elem.click()
time.sleep(5)
# 获取视频播放时长?
js_duration_str = 'return document.getElementById("my-video_html5_api").duration;'
view_time = browser.execute_script(js_duration_str)
print('viewTime:' + str(view_time))
time.sleep(5)
js_current_time_str = 'return document.getElementById("my-video_html5_api").currentTime;'
view_current_time = browser.execute_script(js_current_time_str);
print('viewCurrentTime:' + str(view_current_time))
if math.ceil(view_current_time) >= math.ceil(view_time):
print('1111')
browser.switch_to.default_content()
elem = browser.find_element_by_id('btnexit')
elem.click()
# 关闭视频网站页面 进入pre_window_handle页面
browser.switch_to.window(pre_window_handle)
browser.refresh()
browser.implicitly_wait(10)
switch2frame(browser)
js_list = 'return document.getElementsByClassName("courseware-list-reed").length;'
video_unstudy_num = browser.execute_script(js_list)
time.sleep(3)
# print("local--该目录下还有{}个视频未学习……".format(video_unstudy_num))
run_main(video_unstudy_num, browser)
else:
print('2222')
time.sleep(math.ceil(view_time) - math.ceil(view_current_time))
browser.switch_to.default_content()
elem = browser.find_element_by_id('btnexit')
elem.click()
# 关闭视频网站页面 进入pre_window_handle页面
browser.switch_to.window(pre_window_handle)
browser.refresh()
browser.implicitly_wait(10)
switch2frame(browser)
js_list = 'return document.getElementsByClassName("courseware-list-reed").length;'
video_unstudy_num = browser.execute_script(js_list)
time.sleep(3)
# print("local--该目录下还有{}个视频未学习……".format(video_unstudy_num))
run_main(video_unstudy_num, browser)
else:
print("该目录下还有视频已学习完毕……")
def main():
# 输入账号
username = "****"
# 输入密码
passwd = "*****"
login_url = 'https://gbpx.gd.gov.cn/gdceportal/index.aspx'
option = webdriver.ChromeOptions()
option.add_argument('--mute-audio')
browser = webdriver.Chrome(options=option)
# browser.maximize_window()
browser.get(login_url)
browser.implicitly_wait(10)
# 窗口最大化
browser.maximize_window()
elem = browser.find_element_by_xpath('//*[@id="pnlLogin"]/div[1]/div[2]')
elem.click()
time.sleep(1)
elem = browser.find_element_by_id("txtLoginName")
elem.clear()
elem.send_keys(username)
time.sleep(1)
elem = browser.find_element_by_id("txtPassword")
elem.clear()
elem.send_keys(passwd)
time.sleep(1)
# 验证码
# code_num = pyautogui.prompt("请输入验证码:")
# elem = browser.find_element_by_id("txtValid")
# elem.clear()
# elem.send_keys(code_num)
js_display = 'document.getElementsByClassName("signup_header2")[0].style.display = "none";' \
'document.getElementsByClassName("signup_header")[0].style.display = "none";'
browser.execute_script(js_display)
# 全屏截图
# browser.save_screenshot('pictures.png')
browser.get_screenshot_as_file('pictures.png')
page_snap_obj = Image.open('pictures.png')
# 验证码元素位置
elem = browser.find_element_by_xpath('//*[@id="imgValid"]')
time.sleep(1)
location = elem.location
print("location:"+str(location))
# 获取验证码的大小参数
size = elem.size
print("size:" + str(size))
left = location['x']*1.5
top = location['y']*1.5
right = left + size['width']*1.5
bottom = top + size['height']*1.5
# 按照验证码的长宽,切割验证码
image_obj = page_snap_obj.crop((left, top, right, bottom))
# 打开切割后的完整验证码
# image_obj.show()
code_num = image_str(image_obj)
elem = browser.find_element_by_id("txtValid")
elem.clear()
elem.send_keys(code_num)
elem = browser.find_element_by_xpath('//*[@id="user-login-form"]/div[2]/input[1]')
elem.click()
time.sleep(3)
elem = browser.find_element_by_id('btnStudy')
elem.click()
time.sleep(3)
# browser.switch_to_frame('secondIframe')
# browser.switch_to.frame('secondIframe')
# browser.switch_to.frame('thirdIframe')
# browser.switch_to.frame('dataMainIframe')
switch2frame(browser)
time.sleep(1)
js_list = 'return document.getElementsByClassName("courseware-list-reed").length;'
video_unstudy_num = browser.execute_script(js_list)
time.sleep(3)
run_main(video_unstudy_num, browser)
browser.close()
print("end......")
if __name__ == '__main__':
main()
好了~你以为就完了?
其实在弄这两个脚本的时候,我试过直接用Js脚本无线循环请求也是可以达到的…
因为这个学习系统是用scorm标准记录学习时间及其进度,但是你如果看了他的播放页面源码,可以知道无非就是一直请求这几个URL:
https://url/resplay/resCoursse/createRes (请求课程信息)
https://url/resplay/resCoursse/heartbeat(间隔1分钟提交一个你的学习时间,叠加时间提交的)
https://url/resplay/resCoursse/finished(在关闭页面是提交的)
url是啥你自行去找到…我怕犯法…
然后测试可以用postman 去(post)请求一下
https://url/resplay/resCoursse/createRes (请求课程信息)
请求参数:
labelId–在播放页面URL上面有,因为是默认还是每个人不一样我就不知道了~
courseId–科目id,在播放页面URL上面有
请求头:
token–你账号的token
https://url/resplay/resCoursse/heartbeat(间隔1分钟提交一个你的学习时间,叠加时间提交的)https://url/resplay/resCoursse/finished(在关闭页面是提交的)
请求参数:
labelId–在播放页面URL上面有,因为是默认还是每个人不一样我就不知道了~
courseId–科目id,在播放页面URL上面有
event–>beat
scoData–>{“cmi.core.session_time”:”10:44:02″,”cmi.core.lesson_location”:56079}
我的博客即将同步至腾讯云+社区,邀请大家一同入驻:https://cloud.tencent.com/developer/support-plan?invite_code=dj4ezspgtlkv

