在我的代码中,我使用Firefox作为浏览器去导航到一个网站,进行搜索,然后点击查看每个PDF文档。一旦进入文档(它会加载而不提示我下载),我想下载这个文档。我找到了一个Java版本的方法来实现这一点,但在Python中并不容易。我还尝试从Adobe框架中选取元素(以直接下载它),但再次无法找到xpath对象。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests, time
driver = webdriver.Firefox(executable_path="geckodriver")
driver.get("https://www.okcc.online/")
driver.maximize_window()
options = webdriver.FirefoxOptions()
options.set_preference("browser.download.folderList", 2)
options.set_preference("browser.download.dir", "/Users/username/Desktop/oklahoma/oklahoma_county")
options.set_preference("browser.download.useDownloadDir", "true")
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
options.set_preference("pdfjs.disabled", "true")
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='rod-menu-button']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@id='rodDocTypeTxt']"))).send_keys('MTG')
wait.until(EC.element_to_be_clickable((By.XPATH, "//ul[@id='ui-id-1']//li//div"))).click()
driver.find_element_by_xpath('//*[@id="rod-date-toggle"]').click()
driver.find_element_by_xpath('//*[@id="rodFromDateTxt"]').send_keys('4/1/2020')
driver.find_element_by_xpath('//*[@id="rodToDateTxt"]').send_keys('4/20/2020')
search_button = driver.find_element_by_xpath('//*[@id="rod-submit-search"]').click()
time.sleep(2)
pdf = driver.find_elements_by_css_selector(".icon.pdf-icon")
for i in pdf:
i.click()
time.sleep(3)
download_button = driver.find_element_by_xpath('//*[@id="download"]')
download_button.click()
close_button = driver.find_element_by_css_selector('.pdf-function-button.pdf-close')
close_button.click()