[R] Kosaraju's SCC Algorithm Running
Megan
megan.fight at gmail.com
Mon Nov 7 05:12:38 CET 2016
To whom it may concerns,
We encountered stack overflow issues when we implemented DFS(depth first search) algorithm on a directed graph having 800,000+ vertices and millions of edges. The purpose of running DFS is to use Kosaraju Algorithm to calculate the size of SCC(strongly connected componment) in the graph. This is an assignment from Coursea.org <http://coursea.org/>. We know the maximum size of SCC in the graph is 434,821, which means the maximum recursion depth during code running is 434,821. We know it is a large calculation behind the scene, therefore, we’ve done the below pre-setup hopefully to solve the stack overflow issue. However, we have not got the luck yet. We’ve tried running on both R and RStudio.
We would really appreciate if someone in your team can help to investigate. We can’t wait to see if our code is working in R.
System Information:
Model Name: MacBook Pro
Model Identifier: MacBookPro11,1
Processor Name: Intel Core i5
Processor Speed: 2.4 GHz
Number of Processors: 1
Total Number of Cores: 2
L2 Cache (per Core): 256 KB
L3 Cache: 3 MB
Memory: 8 GB
System settings we have tried:
1. ulimit- s 65000 (to increase stack size before starting up R)
2. R --max-ppsize =500000 (start R with max ppsize)
3. options(expression=500000)
The data we used can be found on
https://d18ky98rnyall9.cloudfront.net/_410e934e6553ac56409b2cb7096a44aa_SCC.txt?Expires=1478649600&Signature=YC0OjTn4hmzWpzAMw3WkQjXgxkAc0qhZrCd07JAPtud3dGQqywpQgkAASf-bWX6qgGafVvObciJ3ww-7wbNqY0YhWxwcg-IxmCnz1xJu1SORdDobiVKmhhfSaqgTyullX1hFmcxAA4y6Ud33hY1dhIP~UTAlW~IV8Y-zSliAts8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A <https://d18ky98rnyall9.cloudfront.net/_410e934e6553ac56409b2cb7096a44aa_SCC.txt?Expires=1478649600&Signature=YC0OjTn4hmzWpzAMw3WkQjXgxkAc0qhZrCd07JAPtud3dGQqywpQgkAASf-bWX6qgGafVvObciJ3ww-7wbNqY0YhWxwcg-IxmCnz1xJu1SORdDobiVKmhhfSaqgTyullX1hFmcxAA4y6Ud33hY1dhIP~UTAlW~IV8Y-zSliAts8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A>
Data discription provided as follow:
https://www.coursera.org/learn/algorithm-design-analysis/exam/rOtFq/programming-assignment-4 <https://www.coursera.org/learn/algorithm-design-analysis/exam/rOtFq/programming-assignment-4>
Below is our code:
options(expressions=500000)
#Prepare input test file
g=read.table("Downloads/scc.txt")
x<<-as.matrix(g)
remove(g)
vector.is.empty<<-function(x) {return(length(x)==0)}
'%!in%' <- function(x,y)!('%in%'(x,y))
#g<-c(2,1,3,1,3,4,4,2,5,4,5,6,6,7,7,8,8,9,2,3)
#x<<-matrix(g,nrow=10,ncol=2, byrow=TRUE)
#g<-c(1,4,2,8,3,6,4,7,5,2,6,9,7,1,8,5,8,6,9,3,9,7,10,2)
#x<<-matrix(g,nrow=12,ncol=2, byrow=TRUE)
#g<-c(1,2,2,3,2,4,3,4,3,5,4,1,4,13,5,6,6,7,7,8,8,9,9,6,10,9,10,11,11,8,11,12,12,13,12,14,13,10,14,15)
#x<<-matrix(g,20,2,byrow=TRUE)
u1<-unique(x[,1])
u2<-unique(x[,2])
u<-c(u1,u2)
n<<-length(unique(u))
remove(u1,u2,u)
G <<- vector("list", length=n)
G_REV <<- vector("list", length=n)
P = numeric(n)
FT = numeric(n)
for (i in 1:nrow(x)) {
a = x[i,1]
b = x[i,2]
#for G
if (is.null(G[[a]])) {
G[[a]] = c(b)
} else {
G[[a]] = c(G[[a]], b)
}
if (is.null(G[[b]])) {
G[[b]] = numeric()
}
#for G_VEV
if (is.null(G_REV[[b]])) {
G_REV[[b]] = c(a)
} else {
G_REV[[b]] = c(G_REV[[b]], a)
}
if (is.null(G_REV[[a]])) {
G_REV[[a]] = numeric()
}
}
G_TEMP <<- G_REV
#G_REV<<-x[,c(2,1)]
#P<<-matrix(c(1:n,rep(0,2*n)),nrow=n,ncol=3) #tracking whether vertex is explored or not
#colnames(P)<-c("node","f","parent_vertex")
explore<<-numeric()
assign("time",0,envir=globalenv())
#Algorithm -DFS
DFS_LOOP<<-function(G){
counter = n
for(i in n : 1){
if (i < counter) {
counter = counter - 1000;
print(i);
}
if (i %in% explore){next}
else {
DFS(i)
}
}
}
DFS<<-function(i){
#if(time>=n){return(p)}
#else{
#print(c("i=",i))
explore<<-c(explore,i)
#print(c("explore",explore))
#P[i,2] <<- 1 # gray
v=G_TEMP[[i]]
#print(c("v=",v))
if (vector.is.empty(v) ==TRUE){
len=1
v=i
}
if(vector.is.empty(v)==FALSE){
len=length(v)
}
for(j in 1: len){
if(v[j] %!in% explore){
DFS(v[j])
#P[v[j],3] <<-i
P[v[j]] <<- i
# print(c("child",j,"parent",i))
}
}
time<<-time + 1
FT[i] <<- time
#P[i,2] <<- time
#P[i,2] <<- 2 #black
# } <<-else
}
print('Starting DFS_loop on G_REV')
DFS_LOOP(G_REV)
###################################################
#temp0<-matrix(1:n,n,1)
# temp1<-P[,c(1,2)]
# colnames(temp1)<-c("before","after")
# temp1<-as.data.frame(temp1)
# colnames(x)<-c("vertex","edge")
# X<-as.data.frame(x)
# X_NEW<-merge(x=X,y=temp1,by.x="vertex",by.y="before")
# remove(X)
# names(X_NEW)[names(X_NEW)=="after"]<-"vertex_new"
# X_NEW2<-merge(x=X_NEW,y=temp1,by.x="edge",by.y="before")
# remove(X_NEW,temp1)
# names(X_NEW2)[names(X_NEW2)=="after"]<-"edge_new"
# G2<-as.matrix(X_NEW2)
# remove(X_NEW2)
# G2<-G2[,c(3,4)]
# u1<-unique(G2[,1])
# u2<-unique(G2[,2])
# u<-c(u1,u2)
# n<<-length(unique(u))
# remove(u1,u2,u)
FT_SORTED = numeric(n)
for (i in length(FT):1) {
finish_time = FT[i]
FT_SORTED[finish_time] = i
}
P = numeric(n)
FT = numeric(n)
#P<<-matrix(c(1:n,rep(0,2*n)),nrow=n,ncol=3) #tracking whether vertex is explored or not
#colnames(P)<-c("vertex","f","parent_vertex")
explore<<-numeric()
assign("time",0,envir=globalenv())
print('Starting DFS_loop on G')
#DFS_LOOP(G2)#2nd DFS
explore<<-numeric()
G_TEMP <<- G
for (i in length(FT_SORTED):1) {
k = FT_SORTED[i]
if (k %!in% explore) {
DFS(k)
}
}
mscc_temp = which(P==0)
scc_temp=FT[mscc_temp]
#scc_temp<-P[P[,3]==0,2]
scc_temp=sort(scc_temp,decreasing=TRUE)
m=length(scc_temp)
scc=numeric()
for (i in 1:(m-1)){
scc[i]=scc_temp[i]-scc_temp[i+1]
}
scc[m]<-scc_temp[m]
scc_top_5<-tail(sort(scc),5)
Thanks,
Jing
[[alternative HTML version deleted]]
More information about the R-help
mailing list