[R] Help with Parallel Processing

Ravi Jeyaraman r@v|76 @end|ng |rom gm@||@com
Wed May 20 19:05:26 CEST 2020


Dear Friends,

 

I'm trying to run a bunch of tasks in parallel using 'Future' package and
for some reason, it's not able to find the data frames that I want it to
find.  I've created the below sample program to show what I'm doing.  Should
I be exporting the Global data to each child process?  I am not doing that
currently because I read somewhere that it's automatically done when using
the multisession plan.  Any idea what I'm doing wrong?  

 

Thanks

Ravi

 

 

if(!require('sqldf')) install.packages('sqldf')

if(!require('future')) install.packages('future')

if(!require('doFuture')) install.packages('doFuture')

if(!require('future.apply')) install.packages('future.apply')

 

library('sqldf')

library('future')

library("doFuture")

library("future.apply")

 

registerDoFuture()

plan(multisession, globals = TRUE, workers=5)

options(future.globals.maxSize=+Inf)

 

DATA_ASIA <- data.frame(c('NAME1', 'NAME2'))

DATA_EUROPE <- data.frame(c('NAME1', 'NAME2', 'NAME3'))

DATA_USA <- data.frame(c('NAME1', 'NAME2', 'NAME3', 'NAME4'))

DATA_AFRICA <- data.frame(c('NAME1'))

 

LEVEL <- c('ASIA_LEVEL', 'EUROPE_LEVEL', 'USA_LEVEL', 'AFRICA_LEVEL')

R_PROG <- c('SELECT COUNT(*) as COUNT FROM DATA_ASIA', 

            'SELECT COUNT(*) as COUNT FROM DATA_EUROPE', 

            'SELECT COUNT(*) as COUNT FROM DATA_USA', 

            'SELECT COUNT(*) as COUNT FROM DATA_AFRICA')

 

RULES_ALL <- data.frame(LEVEL, R_PROG)

 

RULES_ASIA <- subset(RULES_ALL, LEVEL == 'ASIA_LEVEL')

RESULT_ASIA <- future(data.table::rbindlist(lapply(1:nrow(RULES_ASIA),
function(x) sqldf(RULES_ASIA$R_PROG[x])), use.names = TRUE, fill=TRUE))

 

RULES_EUROPE <- subset(RULES_ALL, LEVEL == 'EUROPE_LEVEL')

RESULT_EUROPE <- future(data.table::rbindlist(lapply(1:nrow(RULES_EUROPE),
function(x) sqldf(RULES_EUROPE$R_PROG[x])), use.names = TRUE, fill=TRUE))

 

RULES_USA <- subset(RULES_ALL, LEVEL == 'USA_LEVEL')

RESULT_USA <- future(data.table::rbindlist(lapply(1:nrow(RULES_USA),
function(x) sqldf(RULES_USA$R_PROG[x])), use.names = TRUE, fill=TRUE))

 

RULES_AFRICA <- subset(RULES_ALL, LEVEL == 'AFRICA_LEVEL')

RESULTS_AFRICA <- future(data.table::rbindlist(lapply(1:nrow(RULES_AFRICA),
function(x) sqldf(RULES_AFRICA$R_PROG[x])), use.names = TRUE, fill=TRUE))

 

RESULT_ASIA <- value(RESULT_ASIA)

RESULT_EUROPE <- value(RESULT_EUROPE)

RESULT_USA <- value(RESULT_USA)

RESULTS_AFRICA <- value(RESULTS_AFRICA)

 

 

 



-- 
This email has been checked for viruses by AVG.
https://www.avg.com

	[[alternative HTML version deleted]]



More information about the R-help mailing list