1. 程式人生 > 其它 >使用Selenium破解新浪微博的四宮格驗證碼

使用Selenium破解新浪微博的四宮格驗證碼

在我們爬蟲的時候經常會遇到驗證碼,新浪微博的驗證碼是四宮格形式。

可以採用模板驗證碼的破解方式,也就是把所有驗證碼的情況全部列出來,然後拿驗證碼的圖片和這所有情況中的圖片進行對比,然後獲取驗證碼,再通過selenium自動拖拽點選,進行破解。

我們將驗證碼四個點標註為1234,那麼所有的情況就是以下24種情況。

數字代表箭頭指向:

1234 2134 3124 4321
1243 2143 3142 4312
1342 2314 3214 4123
1324 2341 3241 4132
1423 2413 3412 4213
1432 2431 3421 4231

所有的情況就是以上24種。我們將這24中驗證碼的情況放在一個資料夾內,當我們在登入的時候用獲取的驗證碼截圖去和所有的情況一一對比,然後獲取完全相同的驗證碼,進行點選即可。程式碼如下:

    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.by import By
    from selenium.common.exceptions import TimeoutException
    from selenium.webdriver.common.action_chains import ActionChains
    import time
    from PIL import Image
    from io import BytesIO
    from os import listdir
    USERNAME = ''
    PASSWORD = ''
    class CrackWeiboSlide():
      def __init__(self):
        self.url = 'https://passport.weibo.cn/signin/login'
        self.browser = webdriver.Chrome()
        self.wait = WebDriverWait(self.browser,20)
        self.username = USERNAME
        self.password = PASSWORD
      def __del__(self):
        self.browser.close()
      def open(self):
        """
        開啟網頁輸入使用者名稱密碼登入
        :return: None
        """
        self.browser.get(self.url)
        username = self.wait.until(EC.presence_of_element_located((By.ID,'loginName')))
        password = self.wait.until(EC.presence_of_element_located((By.ID,'loginPassword')))
        submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))
        username.send_keys(self.username)
        password.send_keys(self.password)
        submit.click()
      def get_position(self):
        """
        獲取驗證碼的位置
        :return: 位置
        """
        try:
          img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'patt-shadow')))
        except TimeoutException:
          print('未出現驗證碼')
          self.open()
        time.sleep(2)
        location = img.location
        size = img.size
        top=location['y']
        bottom = location['y']+size['height']
        left = location['x']
        right = location['x']+size['width']
        return (top,bottom,left,right)
      def get_screenshot(self):
        """
        獲取截圖
        :return:截圖
        """
        screentshot = self.browser.get_screenshot_as_png()
        # BytesIO將網頁截圖轉換成二進位制
        screentshot = Image.open(BytesIO(screentshot))
        return screentshot
      def get_image(self,name):
        """獲取驗證碼圖片"""
        top,bottom,left,right = self.get_position()
        print('驗證碼位置',top,bottom,left,right)
        screenshot = self.get_screenshot()
        # crop()將圖片裁剪出來,後面需要一個引數
        captcha = screenshot.crop((left,top,right,bottom))
        captcha.save(name)
        return captcha
      def detect_image(self,image):
        """
        匹配圖片
        :param self:
        :param image: 圖片
        :return: 拖動順序
        """
        # 圖片所在的資料夾
        for template_name in listdir('templates/'):
          print('正在匹配',template_name)
          template = Image.open('templates/'+template_name)
          # 匹配圖片
          if self.same_img(image,template):
            # 將匹配到的檔名轉換為列表
            numbers = [int(number)for number in list(template_name.split('.')[0])]
            print('拖動順序',numbers)
            return numbers
      def is_pixel_equal(self,image1,image2,x,y):
        """
        判斷兩個畫素的相似度
        :param image1: 圖片1
        :param image2: 圖片2
        :param x: 位置x
        :param y: 位置y
        :return: 畫素是否相同
        """
         # 取畫素點
        pixel1 = image1.load()[x,y]
        pixel2 = image2.load()[x,y]
        # 偏差量等於60
        threshold = 60
        if abs(pixel1[0]-pixel2[0]) < threshold and abs(pixel1[1]-pixel2[1])<threshold and abs(pixel1[2]-pixel2[2])<threshold:
          return True
        else:
          return False
      def same_img(self,image,template):
        """
        識別相似的驗證碼
        :param image: 準備識別的驗證碼
        :param template: 模板
        :return:
        """
        # 相似度閾值
        threshold = 0.99
        count = 0
        # 匹配所有畫素點
        for x in range(image.width):
          for y in range(image.height):
            # 判斷畫素
            if self.is_pixel_equal(image,template,x,y):
              count+=1
        result = float(count)/(image.width*image.height)
        if result>threshold:
          print('成功匹配')
          return True
        return False
      def move(self,numbers):
        """
        根據順序拖動,此處接收的引數為前面的驗證碼的順序列表
        :param numbers:
        :return:
        """
        # 獲取四宮格的四個點
        circles = self.browser.find_elements_by_css_selector('.patt-wrap .patt-circ')
        print('-----------------',circles)
        dx = dy =0
        for index in range(4):
          circle = circles[numbers[index]-1]
          if index == 0:
            # 點選第一個點
            ActionChains(self.browser).move_to_element_with_offset(circle,circle.size['width']/2,circle.size['height']/2).click_and_hold().perform()
          else:
            # 慢慢移動
            times = 30
            for i in range(times):
              ActionChains(self.browser).move_by_offset(dx/times,dy/times).perform()
              time.sleep(1/times)
          if index == 3:
            # 鬆開滑鼠
            ActionChains(self.browser).release().perform()
          else:
            # 計算下次的偏移
            dx = circles[numbers[index+1]-1].location['x'] - circle.location['x']
            dy = circles[numbers[index+1]-1].location['y'] - circle.location['y']
      def crack(self):
        """
        破解入口
        :return:
        """
        self.open()
        # 獲取驗證碼圖片
        image = self.get_image('captcha.png')
        numbers = self.detect_image(image)
        self.move(numbers)
        time.sleep(10)
        print('識別結束')
    if __name__ == '__main__':
      crack = CrackWeiboSlide()
      crack.crack()
    
    

設定自己的賬號密碼即可實現。

有時候會匹配不上,圖片相似度閾值達不到0.99以上,這個時候可能是我們收集的驗證碼圖片過時了,重新開啟圖片收集程式,執行收集一下即可。

收集圖片程式程式碼如下:

    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.by import By
    from selenium.common.exceptions import TimeoutException
    import time
    from PIL import Image
    from io import BytesIO
    from os import listdir
    USERNAME = '18239831004'
    PASSWORD = 'qweqweqwe'
    class CrackWeiboSlide():
      def __init__(self):
        self.url = 'https://passport.weibo.cn/signin/login'
        self.browser = webdriver.Chrome()
        self.wait = WebDriverWait(self.browser,20)
        self.username = USERNAME
        self.password = PASSWORD
      def __del__(self):
        self.browser.close()
      def open(self):
        """
        開啟網頁輸入使用者名稱密碼登入
        :return: None
        """
        self.browser.get(self.url)
        username = self.wait.until(EC.presence_of_element_located((By.ID,'loginName')))
        password = self.wait.until(EC.presence_of_element_located((By.ID,'loginPassword')))
        submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))
        username.send_keys(self.username)
        password.send_keys(self.password)
        submit.click()
      def get_position(self):
        """
        獲取驗證碼的位置
        :return: 位置
        """
        try:
          img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'patt-shadow')))
        except TimeoutException:
          print('未出現驗證碼')
          self.open()
        time.sleep(2)
        location = img.location
        size = img.size
        top=location['y']
        bottom = location['y']+size['height']
        left = location['x']
        right = location['x']+size['width']
        return (top,bottom,left,right)
      def get_screenshot(self):
        """
        獲取截圖
        :return:截圖
        """
        screentshot = self.browser.get_screenshot_as_png()
        # BytesIO將網頁截圖轉換成二進位制
        screentshot = Image.open(BytesIO(screentshot))
        return screentshot
      def get_image(self,name):
        """獲取驗證碼圖片"""
        top,bottom,left,right = self.get_position()
        print('驗證碼位置',top,bottom,left,right)
        screenshot = self.get_screenshot()
        # crop()將圖片裁剪出來,後面需要一個引數
        captcha = screenshot.crop((left,top,right,bottom))
        captcha.save(name)
        return captcha
      # 獲取所有的驗證碼
      def main(self):
        count = 0
        while True:
          name = str(count)+'.png'
          self.open()
          self.get_image(name)
          count+=1
    if __name__ == '__main__':
      crack = CrackWeiboSlide()
      crack.main()
    

總結

以上就是這篇文章的全部內容了,希望本文的內容對大家的學習或者工作具有一定的參考學習價值,謝謝大家對指令碼之家的支援。如果你想了解更多相關內容請檢視下面相關連結