如何使用selenium
和firefox
来爬取网站?
安装Firefox、xvfb和selenium
echo "deb http://packages.linuxmint.com debian import" >> /etc/apt/sources.list && apt-get update
apt-get install firefox xvfb python-dev python-pip
pip install pyvirtualdisplay selenium
selenium_scrape.py
from pyvirtualdisplay import Display
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
display = Display(visible=0, size=(800, 600))
display.start()
def init_driver():
driver = webdriver.Firefox()
driver.wait = WebDriverWait(driver, 5)
return driver
def lookup(driver, query):
driver.get("http://www.google.com")
try:
box = driver.wait.until(EC.presence_of_element_located(
(By.NAME, "q")))
button = driver.wait.until(EC.element_to_be_clickable(
(By.NAME, "btnK")))
box.send_keys(query)
button.click()
except TimeoutException:
print("Box or Button not found in google.com")
if __name__ == "__main__":
driver = init_driver()
lookup(driver, "Selenium")
time.sleep(5)
driver.quit()
display.stop()
错误
File "selenium_scrape.py", line 20
box = driver.wait.until(EC.presence_of_element_located(
^
IndentationError: expected an indented block