R并行编程:错误在于{:任务1失败 -“找不到函数”%>%“

5

我尝试通过修改脚本在R中进行并行编程。 在我的脚本中,我进行了两个并行编程。 第一个成功了,但第二个产生了错误,尽管脚本结构相同。 以下是我的代码:

library(rvest)
library(RMySQL)
library(curl)
library(gdata)
library(doMC)
library(foreach)
library(doParallel)
library(raster)


trim <- function (x) gsub("^\\s+|\\s+$", "", x)

setwd('/home/chandra/R/IlmuOne/MisterAladin')

no_cores <- detectCores() 
cl<-makeCluster(no_cores)
registerDoParallel(cl)

MasterData = read.xls("Master Hotels - FINAL.xlsx", sheet = 1, header = TRUE)
MasterData$url_agoda = as.character(MasterData$url_agoda)

today = as.Date(format(Sys.time(), "%Y-%m-%d"))+2

ntasks <- nrow(MasterData)

#This section perfomed well
foreach(i=1:ntasks) %dopar% {    
  url = MasterData$url_agoda[i]        
  if (trim(url)!='-' & trim(url)!='')
  {
    from = gregexpr(pattern ='=',url)[[1]][1]
    piece1 = substr(url,1,from)
    from = gregexpr(pattern ='&los=',url)[[1]][1]
    piece2 = substr(url,from,nchar(url))
    MasterData$url_agoda[i] = paste0(piece1,today,piece2)  
  }   
}

con <-  dbConnect(RMySQL::MySQL(), username = "root", password = "master",host = "localhost", dbname = "mister_aladin")



#Tried first 10 data
#Below section was error and always return error: Error in { : task 1 failed - "could not find function "%>%""
foreach(a=1:10, .packages='foreach') %dopar% {


  hotel_id = MasterData$id[a]
  vendor = 'Agoda'
  url = MasterData$url_agoda[a]    

  if (url!='-')
  {

    tryCatch({                  
      hotel <- curl(url) %>%
        read_html() %>%        
        html_nodes(xpath='//*[@id="room-grouping"]') %>%
        html_table(fill = TRUE)
      hotel <- hotel[[1]]

      hotel$hotel_id= hotel_id
      hotel$vendor= vendor

      colnames(hotel)[1] = 'TheSpace'
      colnames(hotel)[4] = 'PricePerNight'

      room = '-'
      hotel$NormalPrice = 0
      hotel$FinalPrice = 0

      for(i in 1:nrow(hotel))
      {

        if (i==1 | (!grepl('See photos',hotel$TheSpace[i]) & hotel$TheSpace[i]!='') )  
        {
          room = hotel$TheSpace[i]
        }
        hotel$TheSpace[i] = room

        #Normal Price
        if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
        {
          split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
          NormalPrice = trim(split[2])
          hotel$NormalPrice[i] = NormalPrice            
          NormalPrice = as.integer(gsub(",","",NormalPrice))
          hotel$NormalPrice[i] = NormalPrice          
        }        

        #Final Price
        if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
        {
          split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
          FinalPrice = trim(split[6])
          hotel$FinalPrice[i] = FinalPrice
          FinalPrice = as.integer(gsub(",","",FinalPrice))
          hotel$FinalPrice[i] = FinalPrice
        }

        hotel$NormalPrice[is.na(hotel$NormalPrice)] <- 0
        hotel$FinalPrice[is.na(hotel$FinalPrice)] <- 0

      }

      hotel = hotel[which(hotel$FinalPrice!=0),c("TheSpace","NormalPrice","FinalPrice")]
      colnames(hotel) = c('room','normal_price','final_price')

      hotel$log = format(Sys.time(), "%Y-%m-%d %H:%M:%S")
      hotel$hotel_id = hotel_id
      hotel$vendor = vendor


      Push = hotel[,c('hotel_id','room','normal_price','final_price','vendor','log')]

      #print(paste0('Agoda: push one record, hotel id ',hotel_id,'!'))
      #cat(paste(paste0('Agoda: push one record, hotel id ',hotel_id,'!'),'\n'))      
      dbWriteTable(conn=con,name='prices_',value=as.data.frame(Push), append = TRUE, row.names = F)


    },
    error = function(e) {
      Sys.sleep(2)
      e
    })

  }

}

dbDisconnect(con)
stopImplicitCluster()

每次运行脚本时,总是会出现错误:Error in { : task 1 failed - "could not find function "%>%""。
我已经检查了论坛上的每篇文章并尝试应用它们,但都没有起作用。
请给出任何解决方案。

2
library(magrittr) - mpjdem
非常感谢。它起作用了。 - chandra sutrisno
2个回答

10

你需要使用.packages = c("magrittr", ...)并包括所有必需的软件包才能在foreach循环内运行代码。然而,.packages = "foreach"并没有起到帮助作用。

可以想象,你在.packages中定义的所有软件包都会被转发/加载到每个并行工作者中。


5

%>% 运算符需要使用 magrittr 包。然而,在这种情况下,仅在脚本开头加载它是不够的 - 它需要为每个节点加载。您可以在创建集群时添加此行来实现:

cl<-makeCluster(no_cores)
registerDoParallel(cl)
clusterCall(cl, function() library(magrittr))

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接