[R] package usage statistics. (UPDATE)
Vladimir Eremeev
wl at eimb.ru
Tue Sep 26 12:07:25 CEST 2006
Here is the perl script with some comments
<pre>
#!/bin/perl -w
use File::Find;
# we use the standard Perl module.
# its procedure will scan the directory tree and put all package names to the
hash
# along with counting the number of their loadings.
%pkgs=("base"=>-1, # won't print packages installed by default
"datasets"=>-1,
"grDevices"=>-1,
"graphics"=>-1,
"grid"=>-1,
"methods"=>-1,
"splines"=>-1,
"stats"=>-1,
"stats4"=>-1,
"tcltk"=>-1,
"tools"=>-1,
"utils"=>-1,
"MASS"=>-1
);
sub wanted { # this subroutine is used by the File::Find
procedure
# it adds package names to the hash above
return if($_!~/\.[Rr]$/ && $_!~/\.[Rr]history$/); # do nothing if this file
doesn't contain R commands
open IN,"< ".$File::Find::name or die("cannot open file $!");
while(<IN>){
if(/library\((.*)\)/){ # looking for
library(...) calls
$pkgname=$1;
next if(! -d "C:\\Program Files\\R\\library\\$pkgname"); # don't do
anything if the package directory doesn't exist
# simple
protection against typos
if(exists $pkgs{$pkgname}) {
$pkgs{$pkgname}=$pkgs{$pkgname}+1; # here we assume that
basic packages are not loaded
}else{ # with "library()"
$pkgs{$pkgname}=1;
}
}
}
close(IN);
}
sub getdepends { # this subroutine resolves the package dependencies
$pkgname=$_[0]; # its argument is a package name. It finds the packages
the current one depends on
# and adds them to the hash above
open IN, "< C:\\Program Files\\R\\library\\$pkgname\\DESCRIPTION" or return;
#do {print ("cannot open file C:\\Program
Files\\R\\library\\$pkgname\\DESCRIPTION\n $!");
while(<IN>){
if($_=~/^Imports: (.*)/ || $_=~/^Depends: (.*)/) {
@deplist=split(/,/,$1);
for(@deplist) {
next if(/R \(.*\)/); # exclude dependencies on R version
s/\s//g;
if(/(.*)\(.*\)/) {
$pkgname=$1;
}else{
$pkgname=$_;
}
if(exists $pkgs{$pkgname}) {
$pkgs{$pkgname}=$pkgs{$pkgname}+1 if($pkgs{$pkgname}>0); # don't add
basic packages
}else{
$pkgs{$pkgname}=1;
}
}
}
}
close(IN);
}
# now the main loop. hope, it is self-describing
print "Searching for R commands...";
find({ wanted => \&wanted, no_chdir => 1 }, '.');
print "done!\n";
print "Now resolving dependencies...";
for $p (keys %pkgs) {
#print "$p\n";
getdepends($p);
}
print "done!\n";
open OUT,"> install.pkgs.r" or die("cannot create file install.pkgs.r");
print OUT "install.packages(\n";
foreach(keys %pkgs){
print OUT " $_,\n" if($pkgs{$_}>0);
}
print OUT " ask=FALSE)\n";
close(OUT);
</pre>
More information about the R-help
mailing list