[R] package usage statistics. (UPDATE)

Vladimir Eremeev wl at eimb.ru
Tue Sep 26 12:07:25 CEST 2006


Here is the perl script with some comments

<pre>
#!/bin/perl -w

use File::Find;
# we use the standard Perl module.
# its procedure will scan the directory tree and put all package names to the 
hash
# along with counting the number of their loadings.             

%pkgs=("base"=>-1,                # won't print packages installed by default
       "datasets"=>-1,
       "grDevices"=>-1,
       "graphics"=>-1,
       "grid"=>-1,
       "methods"=>-1,
       "splines"=>-1,
       "stats"=>-1,
       "stats4"=>-1,
       "tcltk"=>-1,
       "tools"=>-1,
       "utils"=>-1,
       "MASS"=>-1
      );

sub wanted {                   # this subroutine is used by the File::Find 
procedure
                               # it adds package names to the hash above
  return if($_!~/\.[Rr]$/ && $_!~/\.[Rr]history$/);  # do nothing if this file 
doesn't contain R commands

  open IN,"< ".$File::Find::name or die("cannot open file $!");

  while(<IN>){
    if(/library\((.*)\)/){                                    # looking for 
library(...) calls
      $pkgname=$1;
      next if(! -d "C:\\Program Files\\R\\library\\$pkgname"); # don't do 
anything if the package directory doesn't exist
                                                               # simple 
protection against typos
      if(exists $pkgs{$pkgname}) {
        $pkgs{$pkgname}=$pkgs{$pkgname}+1;            # here we assume that 
basic packages are not loaded
      }else{                                          # with "library()"
        $pkgs{$pkgname}=1;
      }
    }
  }
  close(IN);
}

sub getdepends {        # this subroutine resolves the package dependencies
  $pkgname=$_[0];       # its argument is a package name. It finds the packages 
the current one depends on
                        # and adds them to the hash above
  open IN, "< C:\\Program Files\\R\\library\\$pkgname\\DESCRIPTION" or return; 
#do {print ("cannot open file C:\\Program 
Files\\R\\library\\$pkgname\\DESCRIPTION\n $!");
  while(<IN>){
    if($_=~/^Imports: (.*)/ || $_=~/^Depends: (.*)/) {
      @deplist=split(/,/,$1);
      for(@deplist) {
        next if(/R \(.*\)/);     # exclude dependencies on R version
        s/\s//g;
        if(/(.*)\(.*\)/) {
          $pkgname=$1;
        }else{
          $pkgname=$_;
        }

        if(exists $pkgs{$pkgname}) {
          $pkgs{$pkgname}=$pkgs{$pkgname}+1 if($pkgs{$pkgname}>0);  # don't add 
basic packages
        }else{
          $pkgs{$pkgname}=1;
        }
      }
    }
  }
  close(IN);
}

# now the main loop. hope, it is self-describing

print "Searching for R commands...";
find({ wanted => \&wanted, no_chdir => 1 }, '.');
print "done!\n";

print "Now resolving dependencies...";
for $p (keys %pkgs) {
  #print "$p\n";
  getdepends($p);
}
print "done!\n";

open OUT,"> install.pkgs.r" or die("cannot create file install.pkgs.r");

print OUT "install.packages(\n";
foreach(keys %pkgs){
  print OUT "                  $_,\n" if($pkgs{$_}>0);
}
print OUT " ask=FALSE)\n";

close(OUT);
</pre>



More information about the R-help mailing list