1. 程式人生 > 其它 >雨課堂自動播放視訊爬蟲

雨課堂自動播放視訊爬蟲

import time
import json
import os
from functools import partial

from selenium import webdriver
from concurrent.futures import ThreadPoolExecutor, as_completed


def get_cookies():
    """
    獲取cookies儲存至本地
    """
    browser = webdriver.Firefox()
    log_url = 'https://www.yuketang.cn/web'
    browser.get(log_url)
    time.sleep(
15) # 進行掃碼 dictCookies = browser.get_cookies() # 獲取list的cookies jsonCookies = json.dumps(dictCookies) # 轉換成字串儲存 with open('cookies.txt', 'w') as f: f.write(jsonCookies) print('cookies儲存成功!') def login(driver, url): driver.get(url) time.sleep(10) # 2 account_login_button = driver.find_element_by_xpath(
'/html/body/div[4]/div[2]/div/div[2]/div/div/div/section[2]/div/div/div[2]/div/xt-wrap/xt-controls/xt-inner/xt-playbutton') account_login_button.click() time.sleep(500) title = driver.title print(title) driver.quit() return title def browser_initial(): """" 瀏覽器初始化,並開啟大麥網購票介面(未登入狀態)
""" options = webdriver.FirefoxOptions() # options.add_argument("--headless") # 設定火狐為headless無介面模式 # options.add_argument("--disable-gpu") browser = webdriver.Firefox(options=options) browser.get( 'https://www.yuketang.cn/web') # 從本地讀取cookies並重新整理頁面,成為已登入狀態 with open('./cookies.txt', 'r', encoding='utf8') as f: listCookies = json.loads(f.read()) # 往browser裡新增cookies for cookie in listCookies: cookie_dict = { 'domain': 'www.yuketang.cn', 'name': cookie.get('name'), 'value': cookie.get('value'), 'sessionid': cookie.get('value'), "expires": '', 'path': '/', 'httpOnly': False, 'HostOnly': False, 'Secure': False, 'sameSite': 'None' } browser.add_cookie(cookie_dict) browser.refresh() # 重新整理網頁,cookies才成功 return browser if __name__ == '__main__': # 儲存COOKIES 首次執行先執行這兩句 if not os.path.exists('cookies.txt'): get_cookies() browser = browser_initial() # 上面兩句執行完畢後即可執行下面兩句 start = 12449817 # 視訊起始編號 end = 12449820 # 視訊結束編號74 urls = [ f'https://www.yuketang.cn/v2/web/xcloud/video-student/7698212/{index}' for index in range(start, end) ] print(urls) #partial固定引數 part = partial(login,browser=browser) with ThreadPoolExecutor() as pool: results=pool.map(part,urls) for result in results: print(result)