另一种方法是选择tbody行,通过属性过滤掉隐藏的项目,然后稍后添加标题。
library(rvest)
library(tidyverse)
rows <- read_html("https://www.immd.gov.hk/eng/stat_20220901.html") %>% html_elements(".table-passengerTrafficStat tbody tr")
prefixes <- c("arr", "dep")
cols <- c("Hong Kong Residents", "Mainland Visitors", "Other Visitors", "Total")
headers <- c("Control_Point", crossing(prefixes, cols) %>% unite("headers", 1:2, remove = T) %>% unlist() %>% unname())
df <- map_dfr(
rows,
function(x) {
x %>%
html_elements("td[headers]") %>%
set_names(headers) %>%
html_text()
}
) %>%
mutate(across(c(-1), ~ str_replace(.x, ",", "") %>% as.integer()))
或者略微简化,
library(rvest)
library(tidyverse)
rows <- read_html("https://www.immd.gov.hk/eng/stat_20220901.html") %>% html_elements(".table-passengerTrafficStat tbody tr")
prefixes <- c("arr", "dep")
cols <- c("Hong Kong Residents", "Mainland Visitors", "Other Visitors", "Total")
headers <- c("Control_Point", crossing(prefixes, cols) %>% unite("headers", 1:2, remove = T) %>% unlist() %>% unname())
df <- map_dfr(rows, ~ set_names(.x %>% html_elements("td[headers]") %>% html_text(), headers)) %>%
mutate(across(c(-1), ~ str_replace(.x, ",", "") %>% as.integer()))
class='hRight'
,而对于标题,则是headers='Arrival'
和Departure'
,然后是headers='Control_Point'
(行名称)。 - Rui Barradas