我尝试通过修改脚本在R中进行并行编程。 在我的脚本中,我进行了两个并行编程。 第一个成功了,但第二个产生了错误,尽管脚本结构相同。 以下是我的代码:
library(rvest)
library(RMySQL)
library(curl)
library(gdata)
library(doMC)
library(foreach)
library(doParallel)
library(raster)
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
setwd('/home/chandra/R/IlmuOne/MisterAladin')
no_cores <- detectCores()
cl<-makeCluster(no_cores)
registerDoParallel(cl)
MasterData = read.xls("Master Hotels - FINAL.xlsx", sheet = 1, header = TRUE)
MasterData$url_agoda = as.character(MasterData$url_agoda)
today = as.Date(format(Sys.time(), "%Y-%m-%d"))+2
ntasks <- nrow(MasterData)
#This section perfomed well
foreach(i=1:ntasks) %dopar% {
url = MasterData$url_agoda[i]
if (trim(url)!='-' & trim(url)!='')
{
from = gregexpr(pattern ='=',url)[[1]][1]
piece1 = substr(url,1,from)
from = gregexpr(pattern ='&los=',url)[[1]][1]
piece2 = substr(url,from,nchar(url))
MasterData$url_agoda[i] = paste0(piece1,today,piece2)
}
}
con <- dbConnect(RMySQL::MySQL(), username = "root", password = "master",host = "localhost", dbname = "mister_aladin")
#Tried first 10 data
#Below section was error and always return error: Error in { : task 1 failed - "could not find function "%>%""
foreach(a=1:10, .packages='foreach') %dopar% {
hotel_id = MasterData$id[a]
vendor = 'Agoda'
url = MasterData$url_agoda[a]
if (url!='-')
{
tryCatch({
hotel <- curl(url) %>%
read_html() %>%
html_nodes(xpath='//*[@id="room-grouping"]') %>%
html_table(fill = TRUE)
hotel <- hotel[[1]]
hotel$hotel_id= hotel_id
hotel$vendor= vendor
colnames(hotel)[1] = 'TheSpace'
colnames(hotel)[4] = 'PricePerNight'
room = '-'
hotel$NormalPrice = 0
hotel$FinalPrice = 0
for(i in 1:nrow(hotel))
{
if (i==1 | (!grepl('See photos',hotel$TheSpace[i]) & hotel$TheSpace[i]!='') )
{
room = hotel$TheSpace[i]
}
hotel$TheSpace[i] = room
#Normal Price
if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
{
split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
NormalPrice = trim(split[2])
hotel$NormalPrice[i] = NormalPrice
NormalPrice = as.integer(gsub(",","",NormalPrice))
hotel$NormalPrice[i] = NormalPrice
}
#Final Price
if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
{
split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
FinalPrice = trim(split[6])
hotel$FinalPrice[i] = FinalPrice
FinalPrice = as.integer(gsub(",","",FinalPrice))
hotel$FinalPrice[i] = FinalPrice
}
hotel$NormalPrice[is.na(hotel$NormalPrice)] <- 0
hotel$FinalPrice[is.na(hotel$FinalPrice)] <- 0
}
hotel = hotel[which(hotel$FinalPrice!=0),c("TheSpace","NormalPrice","FinalPrice")]
colnames(hotel) = c('room','normal_price','final_price')
hotel$log = format(Sys.time(), "%Y-%m-%d %H:%M:%S")
hotel$hotel_id = hotel_id
hotel$vendor = vendor
Push = hotel[,c('hotel_id','room','normal_price','final_price','vendor','log')]
#print(paste0('Agoda: push one record, hotel id ',hotel_id,'!'))
#cat(paste(paste0('Agoda: push one record, hotel id ',hotel_id,'!'),'\n'))
dbWriteTable(conn=con,name='prices_',value=as.data.frame(Push), append = TRUE, row.names = F)
},
error = function(e) {
Sys.sleep(2)
e
})
}
}
dbDisconnect(con)
stopImplicitCluster()
每次运行脚本时,总是会出现错误:Error in { : task 1 failed - "could not find function "%>%""。
我已经检查了论坛上的每篇文章并尝试应用它们,但都没有起作用。
请给出任何解决方案。
library(magrittr)
库 - mpjdem