使用Selenium解决验证码问题

Question

使用Selenium解决验证码问题

pythonseleniumselenium-webdrivercaptchasimplecaptcha

4

我的代码一直在解决不同的验证码！请纠正我的错误，因为我不知道是什么原因导致了这个问题！

from selenium import webdriver
from python3_anticaptcha import ImageToTextTask, CallbackClient
import time
import requests

browser = webdriver.Firefox()

url = 'https://urlmased.com/'
browser.get(url)
time.sleep(10)
username = browser.find_element_by_id("masked")
username.send_keys("testuser")
password = browser.find_element_by_id("masked")
password.send_keys("testpass")

image_link = browser.find_element_by_xpath(
    '//*[@id="masked"]').get_attribute('src')
pic = requests.get(image_link)
if pic.status_code == 200:
    with open("image.png", 'wb') as f:
        f.write(pic.content)
ANTICAPTCHA_KEY = 'masked'
captcha_file = "image.png"
result = ImageToTextTask.ImageToTextTask(
    anticaptcha_key=ANTICAPTCHA_KEY).captcha_handler(captcha_file=captcha_file)

captcha = browser.find_element_by_id("masked")
captcha.send_keys(result['solution']['text'])
login = browser.find_element_by_id("yw2")

请注意API目前是激活状态，您可以在找到解决方案之前继续使用它，然后我会进行更改。另外，解决问题的准确率为100％。

- Anna Plym

评论不适合进行长时间的讨论；此对话已被移至聊天室。 - Samuel Liew

3个回答

0

使用验证码截图而不是从链接获取图像应该可以解决这个问题。

from selenium import webdriver
from python3_anticaptcha import ImageToTextTask
import time

browser = webdriver.Firefox()

url = 'https://urlmased.com/'
browser.get(url)
time.sleep(10)
username = browser.find_element_by_id("masked")
username.send_keys("testuser")
password = browser.find_element_by_id("masked")
password.send_keys("testpass")

captcha = browser.find_element_by_xpath(
    '//*[@id="masked"]')
captcha_image = captcha.screenshot_as_png
with open('image.png', 'wb') as f:
    f.write(captcha_image)
ANTICAPTCHA_KEY = 'masked'
captcha_file = "image.png"
result = ImageToTextTask.ImageToTextTask(
    anticaptcha_key=ANTICAPTCHA_KEY).captcha_handler(captcha_file=captcha_file)

captcha = browser.find_element_by_id("masked")
captcha.send_keys(result['solution']['text'])
login = browser.find_element_by_id("yw2")

- kayak

0

正如@pguradiario所提到的，request.Session并没有做任何事情。

from selenium import webdriver
from python3_anticaptcha import ImageToTextTask
import time

browser = webdriver.Chrome()

url = 'https://masked/'
browser.get(url)
time.sleep(10)
username = browser.find_element_by_id("masked")
username.send_keys("testuser")
password = browser.find_element_by_id("masked")
password.send_keys("testpass")

image_link = url + browser.find_element_by_xpath('//*[@id="masked"]').get_attribute('src')
print(image_link)

ANTICAPTCHA_KEY = "masked"
user_answer = ImageToTextTask.ImageToTextTask(anticaptcha_key=ANTICAPTCHA_KEY).\
    captcha_handler(captcha_link=image_link)

captcha = browser.find_element_by_id("masked")
captcha.send_keys(user_answer['solution']['text'])
login = browser.find_element_by_id("masked")

- Harish Vutukuri

'src' 给出了绝对URL，需要与基本URL连接。我正在获取页面上的验证码并将其发送给captha_handler。 - Harish Vutukuri

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Ahmed Soliman · Accepted Answer

问题在于页面源代码中的验证码URL不是实际的图像URL。它是用来动态生成验证码图像的脚本，因此当您使用验证码解算器API时，您正在解决与浏览器加载的图片不同的另一张图片。要解决这个问题，我们需要保存浏览器加载的相同图像。我追踪了图片请求并发现它使用了在浏览器加载页面时生成的唯一cookie。

使用Selenium：

from selenium import webdriver
from python3_anticaptcha import ImageToTextTask, CallbackClient
from time import sleep
import requests



def GetImageCookies():
    print('Extracting Browser Cookies')
    image_cookies = ''
    for cookie in browser.get_cookies():
        if cookie['name'] == 'ssc':
            image_cookies += 'ssc={};'.format(cookie['value'])
        elif cookie['name'] == 'ghsdfkjlksssalk35bbr':
            image_cookies += 'ghsdfkjlksssalk35bbr={};'.format(cookie['value'])
    # print(image_cookies)
    return image_cookies

def SaveImage(captcha_file = "master.jpg"):
    print('Saving the captcha image')
    header = {
    'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en,en-US;q=0.9,ar;q=0.8',
    'Cookie': GetImageCookies(),
    'Host': 'masked',
    'Referer': 'masked',
    'Sec-Fetch-Mode': 'no-cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0'}


    pic = requests.get('https://masked/site/captcha/v/',verify=False,headers = header)
    if pic.status_code == 200:
        with open(captcha_file, 'wb') as f:
            f.write(pic.content)

def SolveCapcha(captcha_file = "master.jpg"):
    print('Solving the captcha image')
    ANTICAPTCHA_KEY = 'masked'  
    result = ImageToTextTask.ImageToTextTask(
        anticaptcha_key=ANTICAPTCHA_KEY).captcha_handler(captcha_file=captcha_file)
    captcha_text = result['solution']['text']
    print('Captcha text is :',captcha_text)
    return captcha_text


browser = webdriver.Firefox()
url = 'https://masked/'
browser.get(url)
def Login():
    SaveImage()
    sleep(5)
    username = browser.find_element_by_id("masked_username")
    username.clear()
    username.send_keys("testuser")
    password = browser.find_element_by_id("masked")
    password.clear()
    password.send_keys("testpass")
    captcha = browser.find_element_by_id("masked")
    captcha.clear()
    captcha_text = SolveCapcha()
    captcha.send_keys(captcha_text)
    login = browser.find_element_by_id("masked").click()
    sleep(5)
    err_message = browser.find_elements_by_id('masked')
    if err_message :
        if err_message[0].text == 'The verification code is incorrect.':
            print(err_message[0].text)
            return False
    return True


"""The logic here is that the image gets downloaded using the cookies but sometimes
the letters are hard to be solved so each time we download the same image with the
same cookies the content of the image will be the same but how it's written is different
So we keep trying till we get it right """
while Login() == False:
    Login()

使用Requests和Beautiful Soup:

以下是思路，不确定是否可行，需要自行测试：

from bs4 import BeautifulSoup
def SaveImage(captcha_file = "master.jpg"):
    print('Saving the captcha image')
    header = {
    'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en,en-US;q=0.9,ar;q=0.8',
    'Host': 'masked',
    'Referer': 'https://masked/',
    'Sec-Fetch-Mode': 'no-cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0'}


    pic = session.get('https://masked/site/captcha/v/',verify=False,headers = header)
    if pic.status_code == 200:
        with open(captcha_file, 'wb') as f:
            f.write(pic.content)

with requests.Session() as session:
    source      = session.get(url = 'https://masked/',verify=False) # To get the itial cookies  
    soup        = BeautifulSoup(source.text, 'html.parser')  
    token       = soup.find('input', {'name': 'masked'}).get('value')
    SaveImage()
    captcha_text = SolveCapcha()
    post_data={"masked": token,
                'masked[username]': 'testuser',
                'masked[password]': 'testpass',
                'masked[captcha]': captcha_text,
                'masked':''}
    session.post('https://masked/', data=post_data,verify=False)