rm(list=ls()) require("parallel") escape.time = function(x, y, limit=256) { # for Mandelbrot's fractal # e.g. Algorithm is easy to see with # z0 = -1 + 1i # z_{i+1} = (z_i)^2 + z_0 # Calculate and draw z0, z1, z2. z2 is outside circle, so escape # time for z0 is 2. z0 = complex(real=x, imaginary=y) z = z0 for (i in 0:limit) { if (abs(z) > 2) { break } z = z^2 + z0 } return(i) } n = 400 # Set up (x, y) pairs in region defined by -2 <= x, y <= 2. interval = seq(from=-2, to=2, length.out=n) x = rep(x=interval, each=n) y = rep(x=interval, times=n) print("Timing mapply(), one core ...") # Find escape time of all (x, y) pairs. The next line uses only one core. print(system.time(v <- mapply(FUN=escape.time, x, y))) # Uses several cores, if available. n.cores = detectCores() if (.Platform\$OS.type == "windows") { cluster = makePSOCKcluster(names=n.cores) cat(sep="", "Timing clusterMap(), ", n.cores, " cores ...") # This is slower than no multicore on my office Windows computer. I don't know why. # I'm puzzled because parLapply(), in the nfl.R example, was faster than no multicore. print(system.time(v <- clusterMap(cl=cluster, fun=escape.time, x, y))) stopCluster(cluster) } else { cat(sep="", "Timing mcmapply(), ", n.cores, " cores ...") print(system.time(v <- mcmapply(FUN=escape.time, x, y, mc.cores=n.cores))) } # m = matrix(data=unlist(v), nrow=n, ncol=n, byrow=TRUE) # image(x=interval, y=interval, z=m, col=rainbow(256)) # ---------------------------------------------------------------------- # Ok, let's try to do better on Windows. I'm concerned that the # parallel package didn't do a smart job of allocating jobs to # processors (did it create 400^2 = 160000 jobs?), and that that's # what killed performance on my Windows computer. A knowledgeable # student suggested, instead, that it's communication between the # original process and four new processes that is killing # performance. Here I'll try partitioning the data into n.cores parts, # and then shipping each part to a processor, hoping to minimize the # overhead of job allocation. # This function returns the escape time for the ith of n.cores parts # of the ordered pairs represented in vectors x and y. (I'm using # parameter name "xx" instead of "x" because parLapply() and its # relatives use a parameter "x", which prevents me from passing "x=x" # as an additional parameter for "..." in parLapply() call, below.) # (JG: clusterSplit() can do some of this work for me.) escape.time.ith.part = function(i, n.cores, xx, y) { n = length(xx) stopifnot(n == length(y)) ends = c(0, floor((1:n.cores) * n / n.cores)) start = ends[i] + 1 end = ends[i+1] times = mapply(FUN=escape.time, x=xx[start:end], y=y[start:end]) return(times) } if (.Platform\$OS.type == "windows") { cluster = makePSOCKcluster(names=n.cores) clusterEvalQ(cl=cluster, expr=source("escape.time.R")) # escape.time() is missing on the child processes without this call. # A better alternative for the previous line is probably clusterExport(cl=cluster, varlist="escape.time"). cat(sep="", "Timing parLapply(), ", n.cores, " cores ...") print(system.time(v <- parLapply(cl=cluster, X=1:n.cores, fun=escape.time.ith.part, n.cores=n.cores, xx=x, y=y))) stopCluster(cluster) } else { cat(sep="", "Timing mclapply(), ", n.cores, " cores ...") print(system.time(v <- mclapply(X=1:n.cores, FUN=escape.time.ith.part, mc.cores=n.cores, n.cores=n.cores, xx=x, y=y))) } # Now we got an improvement on Windows.