前提:因为我姐需要帮她挂机刷学时~而我自己又懒~于是就直接谷歌了一下…
在这位大神的前提下修改了一下:https://www.52pojie.cn/thread-1105553-1-1.html
主要这位大神的代码是直接还有多少个未学习/正在学习的都全部打开窗口,不符合最新规则只能打开一门进行学习
因为自己学python 也不久.菜逼一个~
使用前需要安装浏览器对应的驱动,代码使用的是谷歌浏览器,因此自行百度/谷歌如何下载安装谷歌浏览器驱动~
python建议直接安装Anaconda吧,就不用每个都pip了~
ps:记得弄个网络好哒~我这边没考虑啥报错的问题~如果遇上什么鹏博士那些的话就节哀吧~
代码如下:
from selenium import webdriver import pyautogui import time import math def switch2frame(par): par.switch_to.frame('secondIframe') par.switch_to.frame('thirdIframe') par.switch_to.frame('dataMainIframe') def run_main(video_unstudy_num, browser): if int(video_unstudy_num) > 0: print("nonlocal--该目录下还有{}个视频未学习……".format(video_unstudy_num)) js_click = 'document.getElementsByClassName("courseware-list-reed")[0].click()' browser.execute_script(js_click) time.sleep(3) # 拿到所有的窗口 all_handles = browser.window_handles pre_window_handle = browser.current_window_handle for handle in all_handles: if handle != pre_window_handle: browser.switch_to.window(handle) browser.implicitly_wait(10) # time.sleep(10) # elem = browser.find_element_by_class_name("introjs-button") elem = browser.find_element_by_link_text('好的,我知道了') elem.click() time.sleep(2) browser.switch_to.frame('course_frame') time.sleep(10) # 点击播放 js_paused = 'return document.getElementById("my-video_html5_api").paused;' view_paused_status = browser.execute_script(js_paused) print('viewPaused:' + str(view_paused_status)) # false 点击了播放 true 点击了暂停 if view_paused_status: elem = browser.find_element_by_class_name("vjs-play-control") elem.click() time.sleep(5) # 获取视频播放时长? js_duration_str = 'return document.getElementById("my-video_html5_api").duration;' view_time = browser.execute_script(js_duration_str) print('viewTime:' + str(view_time)) time.sleep(5) js_current_time_str = 'return document.getElementById("my-video_html5_api").currentTime;' view_current_time = browser.execute_script(js_current_time_str); print('viewCurrentTime:' + str(view_current_time)) if math.ceil(view_current_time) >= math.ceil(view_time): print('1111') browser.switch_to.default_content() elem = browser.find_element_by_id('btnexit') elem.click() # 关闭视频网站页面 进入pre_window_handle页面 browser.switch_to.window(pre_window_handle) browser.refresh() browser.implicitly_wait(10) switch2frame(browser) js_list = 'return document.getElementsByClassName("courseware-list-reed").length;' video_unstudy_num = browser.execute_script(js_list) time.sleep(3) # print("local--该目录下还有{}个视频未学习……".format(video_unstudy_num)) run_main(video_unstudy_num, browser) else: print('2222') time.sleep(math.ceil(view_time) - math.ceil(view_current_time)) browser.switch_to.default_content() elem = browser.find_element_by_id('btnexit') elem.click() # 关闭视频网站页面 进入pre_window_handle页面 browser.switch_to.window(pre_window_handle) browser.refresh() browser.implicitly_wait(10) switch2frame(browser) js_list = 'return document.getElementsByClassName("courseware-list-reed").length;' video_unstudy_num = browser.execute_script(js_list) time.sleep(3) # print("local--该目录下还有{}个视频未学习……".format(video_unstudy_num)) run_main(video_unstudy_num, browser) else: print("该目录下还有视频已学习完毕……") def main(): # 输入账号 username = "*****" # 输入密码 passwd = "*****" login_url = 'https://gbpx.gd.gov.cn/gdceportal/index.aspx' option = webdriver.ChromeOptions() option.add_argument('--mute-audio') browser = webdriver.Chrome(options=option) browser.get(login_url) browser.implicitly_wait(10) # 窗口最大化 browser.maximize_window() elem = browser.find_element_by_xpath('//*[@id="pnlLogin"]/div[1]/div[2]') elem.click() time.sleep(1) elem = browser.find_element_by_id("txtLoginName") elem.clear() elem.send_keys(username) time.sleep(1) elem = browser.find_element_by_id("txtPassword") elem.clear() elem.send_keys(passwd) time.sleep(1) # 验证码 code_num = pyautogui.prompt("请输入验证码:") elem = browser.find_element_by_id("txtValid") elem.clear() elem.send_keys(code_num) elem = browser.find_element_by_xpath('//*[@id="user-login-form"]/div[2]/input[1]') elem.click() time.sleep(3) elem = browser.find_element_by_id('btnStudy') elem.click() time.sleep(3) # browser.switch_to_frame('secondIframe') # browser.switch_to.frame('secondIframe') # browser.switch_to.frame('thirdIframe') # browser.switch_to.frame('dataMainIframe') switch2frame(browser) time.sleep(1) js_list = 'return document.getElementsByClassName("courseware-list-reed").length;' video_unstudy_num = browser.execute_script(js_list) time.sleep(3) run_main(video_unstudy_num, browser) browser.close() print("end......") if __name__ == '__main__': main()
然后在玩这个的同时,又想着能不能直接让python自己去识别验证码呢?
于是乎就在上面的代码基础上,弄了一个自动识别验证码的脚本,但是pytesseract识别率是真的差(可能我百度来的脚本不行?)
使用前下面脚本时,需要先预装tesseract-ocr,怎么下载安装记得百度噢
代码如下:
from selenium import webdriver import pyautogui import time import math from PIL import Image # 用于打开图片和对图片处理 import pytesseract # 用于图片转文字 import re def switch2frame(par): par.switch_to.frame('secondIframe') par.switch_to.frame('thirdIframe') par.switch_to.frame('dataMainIframe') def processing_image(img_obj): # 转灰度 print('转灰度1') img = img_obj.convert("L") print('转灰度2') pixdata = img.load() w, h = img.size threshold = 160 # 遍历所有像素,大于阈值的为黑色 for y in range(h): for x in range(w): if pixdata[x, y] < threshold: pixdata[x, y] = 0 else: pixdata[x, y] = 255 return img def delete_spot(img_obj): print('delete_spot') images = processing_image(img_obj) data = images.getdata() w, h = images.size black_point = 0 for x in range(1, w - 1): for y in range(1, h - 1): mid_pixel = data[w * y + x] # 中央像素点像素值 if mid_pixel < 50: # 找出上下左右四个方向像素点像素值 top_pixel = data[w * (y - 1) + x] left_pixel = data[w * y + (x - 1)] down_pixel = data[w * (y + 1) + x] right_pixel = data[w * y + (x + 1)] # 判断上下左右的黑色像素点总个数 if top_pixel < 10: black_point += 1 if left_pixel < 10: black_point += 1 if down_pixel < 10: black_point += 1 if right_pixel < 10: black_point += 1 if black_point < 1: images.putpixel((x, y), 255) black_point = 0 return images def image_str(img_obj): print("image_str") image = delete_spot(img_obj) # 设置pyteseract路径 pytesseract.pytesseract.tesseract_cmd = r"D:\Program Files (x86)\Tesseract-OCR\tesseract" # 图片转文字 result = pytesseract.image_to_string(image) print(result) # 去除识别出来的特殊字符 resultj = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", result) print(resultj) result_four = resultj[0:4] return result_four def run_main(video_unstudy_num, browser): if int(video_unstudy_num) > 0: print("nonlocal--该目录下还有{}个视频未学习……".format(video_unstudy_num)) js_click = 'document.getElementsByClassName("courseware-list-reed")[0].click()' browser.execute_script(js_click) time.sleep(3) # 拿到所有的窗口 all_handles = browser.window_handles pre_window_handle = browser.current_window_handle for handle in all_handles: if handle != pre_window_handle: browser.switch_to.window(handle) browser.implicitly_wait(10) # time.sleep(10) # elem = browser.find_element_by_class_name("introjs-button") elem = browser.find_element_by_link_text('好的,我知道了') elem.click() time.sleep(2) browser.switch_to.frame('course_frame') time.sleep(10) # 点击播放 js_paused = 'return document.getElementById("my-video_html5_api").paused;' view_paused_status = browser.execute_script(js_paused) print('viewPaused:' + str(view_paused_status)) # false 点击了播放 true 点击了暂停 if view_paused_status: elem = browser.find_element_by_class_name("vjs-play-control") elem.click() time.sleep(5) # 获取视频播放时长? js_duration_str = 'return document.getElementById("my-video_html5_api").duration;' view_time = browser.execute_script(js_duration_str) print('viewTime:' + str(view_time)) time.sleep(5) js_current_time_str = 'return document.getElementById("my-video_html5_api").currentTime;' view_current_time = browser.execute_script(js_current_time_str); print('viewCurrentTime:' + str(view_current_time)) if math.ceil(view_current_time) >= math.ceil(view_time): print('1111') browser.switch_to.default_content() elem = browser.find_element_by_id('btnexit') elem.click() # 关闭视频网站页面 进入pre_window_handle页面 browser.switch_to.window(pre_window_handle) browser.refresh() browser.implicitly_wait(10) switch2frame(browser) js_list = 'return document.getElementsByClassName("courseware-list-reed").length;' video_unstudy_num = browser.execute_script(js_list) time.sleep(3) # print("local--该目录下还有{}个视频未学习……".format(video_unstudy_num)) run_main(video_unstudy_num, browser) else: print('2222') time.sleep(math.ceil(view_time) - math.ceil(view_current_time)) browser.switch_to.default_content() elem = browser.find_element_by_id('btnexit') elem.click() # 关闭视频网站页面 进入pre_window_handle页面 browser.switch_to.window(pre_window_handle) browser.refresh() browser.implicitly_wait(10) switch2frame(browser) js_list = 'return document.getElementsByClassName("courseware-list-reed").length;' video_unstudy_num = browser.execute_script(js_list) time.sleep(3) # print("local--该目录下还有{}个视频未学习……".format(video_unstudy_num)) run_main(video_unstudy_num, browser) else: print("该目录下还有视频已学习完毕……") def main(): # 输入账号 username = "****" # 输入密码 passwd = "*****" login_url = 'https://gbpx.gd.gov.cn/gdceportal/index.aspx' option = webdriver.ChromeOptions() option.add_argument('--mute-audio') browser = webdriver.Chrome(options=option) # browser.maximize_window() browser.get(login_url) browser.implicitly_wait(10) # 窗口最大化 browser.maximize_window() elem = browser.find_element_by_xpath('//*[@id="pnlLogin"]/div[1]/div[2]') elem.click() time.sleep(1) elem = browser.find_element_by_id("txtLoginName") elem.clear() elem.send_keys(username) time.sleep(1) elem = browser.find_element_by_id("txtPassword") elem.clear() elem.send_keys(passwd) time.sleep(1) # 验证码 # code_num = pyautogui.prompt("请输入验证码:") # elem = browser.find_element_by_id("txtValid") # elem.clear() # elem.send_keys(code_num) js_display = 'document.getElementsByClassName("signup_header2")[0].style.display = "none";' \ 'document.getElementsByClassName("signup_header")[0].style.display = "none";' browser.execute_script(js_display) # 全屏截图 # browser.save_screenshot('pictures.png') browser.get_screenshot_as_file('pictures.png') page_snap_obj = Image.open('pictures.png') # 验证码元素位置 elem = browser.find_element_by_xpath('//*[@id="imgValid"]') time.sleep(1) location = elem.location print("location:"+str(location)) # 获取验证码的大小参数 size = elem.size print("size:" + str(size)) left = location['x']*1.5 top = location['y']*1.5 right = left + size['width']*1.5 bottom = top + size['height']*1.5 # 按照验证码的长宽,切割验证码 image_obj = page_snap_obj.crop((left, top, right, bottom)) # 打开切割后的完整验证码 # image_obj.show() code_num = image_str(image_obj) elem = browser.find_element_by_id("txtValid") elem.clear() elem.send_keys(code_num) elem = browser.find_element_by_xpath('//*[@id="user-login-form"]/div[2]/input[1]') elem.click() time.sleep(3) elem = browser.find_element_by_id('btnStudy') elem.click() time.sleep(3) # browser.switch_to_frame('secondIframe') # browser.switch_to.frame('secondIframe') # browser.switch_to.frame('thirdIframe') # browser.switch_to.frame('dataMainIframe') switch2frame(browser) time.sleep(1) js_list = 'return document.getElementsByClassName("courseware-list-reed").length;' video_unstudy_num = browser.execute_script(js_list) time.sleep(3) run_main(video_unstudy_num, browser) browser.close() print("end......") if __name__ == '__main__': main()
好了~你以为就完了?
其实在弄这两个脚本的时候,我试过直接用Js脚本无线循环请求也是可以达到的…
因为这个学习系统是用scorm标准记录学习时间及其进度,但是你如果看了他的播放页面源码,可以知道无非就是一直请求这几个URL:
https://url/resplay/resCoursse/createRes (请求课程信息)
https://url/resplay/resCoursse/heartbeat(间隔1分钟提交一个你的学习时间,叠加时间提交的)
https://url/resplay/resCoursse/finished(在关闭页面是提交的)
url是啥你自行去找到…我怕犯法…
然后测试可以用postman 去(post)请求一下
https://url/resplay/resCoursse/createRes (请求课程信息)
请求参数:
labelId–在播放页面URL上面有,因为是默认还是每个人不一样我就不知道了~
courseId–科目id,在播放页面URL上面有
请求头:
token–你账号的token
https://url/resplay/resCoursse/heartbeat(间隔1分钟提交一个你的学习时间,叠加时间提交的)https://url/resplay/resCoursse/finished(在关闭页面是提交的)
请求参数:
labelId–在播放页面URL上面有,因为是默认还是每个人不一样我就不知道了~
courseId–科目id,在播放页面URL上面有
event–>beat
scoData–>{“cmi.core.session_time”:”10:44:02″,”cmi.core.lesson_location”:56079}
我的博客即将同步至腾讯云+社区,邀请大家一同入驻:https://cloud.tencent.com/developer/support-plan?invite_code=dj4ezspgtlkv