[Rd] data is getting corrupted
Jeff D. Hamann
jeff.hamann at forestinformatics.com
Mon Nov 29 18:27:42 CET 2004
I've been attempting to perform some analysis on a model that was
interfaced with R (R calls a library that takes SEXPs and converts the
data frames into the internal structures of data), and I notice that for
small data.frames the vectors don't get corrupt (n<200-ish). When I pass
in larger data.frames, the vectors will become corrupt. I've been
PROTECTING the heck out of everything (as best as I can from the examples)
to make sure that something is not overlooked. I know the code in my
library works fine becuase when I attempt to do the same thing (with much
larger data arrays) none of this behaviour occurs.
An example of the corruption is,
1 1714 ARPA 0.00 0.0000 0.00 0.0000 3.64 0.000 1
20.00 0.00 0.00 0
1 1715 ARPA 0.00 0.0000 0.00 0.0000 3.14 0.000 1
20.00 0.00 0.00 0
1 1716 ARPA
97538806975312948000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00
0.0000 0.00 0.0000 4.68 0.000 1 20.00 0.00 0.00
0
1 1717 ARPA 0.00 0.0000 0.00 0.0000 2.50 0.000 1
20.00 0.00 0.00 0
1 1718 ARPA 0.00 0.0000 0.00 0.0000 4.78 0.000 1
20.00 0.00 0.00 0
1 1719 ARPA 0.00 0.0000 0.00 0.0000 4.04 0.000 1
20.00 0.00 0.00 0
1 1720 ARPA 0.00 0.0000 0.00 0.0000 2.60 0.000 1
20.00 0.00 0.00 0
1 1721 ARPA
1141566538356936100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00
0.0000 0.00 0.0000 3.57 0.000 1 20.00 0.00 0.00
0
1 1722 ARPA 0.00 0.0000 0.00 0.0000 2.29 0.000 1
20.00 0.00 0.00 0
And it's the same columns that become corrupt. I wanted to make sure I'm
using PROTECT correctly (the examples in the docs don't appear very
thourough) and included a snippet below,
SEXP r_write_sample_to_file( SEXP sample_in,
SEXP filename )
{
unsigned long return_code;
struct SAMPLE_RECORD *sample_ptr;
SEXP ans;
PROTECT( filename = AS_CHARACTER( filename ) );
PROTECT(ans = allocVector(INTSXP, 1));
PROTECT( sample_in = AS_LIST( sample_in ) );
sample_ptr = build_sample_from_sexp( sample_in );
write_sample_to_file(
&return_code,
CHAR(STRING_ELT(filename, 0)),
sample_ptr,
N_SPECIES,
SPECIES_PTR );
if( return_code != CONIFERS_SUCCESS )
{
Rprintf( "unable to write %s\n", CHAR(STRING_ELT(filename, 0)) );
INTEGER(ans)[0] = -1;
UNPROTECT(2);
UNPROTECT( 1 );
return ans;
}
INTEGER(ans)[0] = 0;
UNPROTECT(2);
UNPROTECT( 1 );
return ans;
}
which calls this rather lengthy function (but I thought I should include
the entire function for completeness),
/* this function converts the sample list */
/* from R into the internal structure */
struct SAMPLE_RECORD *build_sample_from_sexp( SEXP sample )
{
int i;
/* plots variables */
SEXP plot_list;
SEXP plot_plot_sexp;
SEXP plot_lat_sexp;
SEXP plot_long_sexp;
SEXP plot_elev_sexp;
SEXP plot_slp_sexp;
SEXP plot_asp_sexp;
SEXP plot_h20_sexp;
SEXP plot_map_sexp;
/* plants variables */
SEXP plant_list;
SEXP plant_plot_sexp;
SEXP plant_plant_sexp;
SEXP plant_sp_code_sexp;
SEXP plant_d6_sexp;
SEXP plant_d6_area_sexp;
SEXP plant_dbh_sexp;
SEXP plant_basal_area_sexp;
SEXP plant_tht_sexp;
SEXP plant_cr_sexp;
SEXP plant_n_stems_sexp;
SEXP plant_expf_sexp;
SEXP plant_crown_width_sexp;
SEXP plant_crown_area_sexp;
SEXP plant_user_code_sexp;
char temp_sp_code[16];
struct SAMPLE_RECORD *s_ptr;
struct SPECIES_RECORD *sp_ptr;
s_ptr = (struct SAMPLE_RECORD *)calloc( 1, sizeof( struct SAMPLE_RECORD
) );
/* s_ptr = (struct SAMPLE_RECORD *)Calloc( 1, struct SAMPLE_RECORD ); */
/* *fill in the header info */
strcpy( s_ptr->forest,
CHAR(STRING_ELT(get_list_element(sample,"forest"), 0)) ) ;
strcpy( s_ptr->subunit,
CHAR(STRING_ELT(get_list_element(sample,"subunit"), 0)) );
strcpy( s_ptr->stand_name,
CHAR(STRING_ELT(get_list_element(sample,"stand.name"), 0)) );
strcpy( s_ptr->legal, CHAR(STRING_ELT(get_list_element(sample,"legal"),
0)) );
s_ptr->elevation = asInteger( get_list_element( sample, "elevation" ) );
s_ptr->acreage = asReal( get_list_element( sample, "acreage" ) );
s_ptr->age = asInteger( get_list_element( sample, "age" ) );
s_ptr->sampled_month = asInteger( get_list_element( sample,
"sampled.month" ) );
s_ptr->sampled_day = asInteger( get_list_element( sample, "sampled.day"
) );
s_ptr->sampled_year = asInteger( get_list_element( sample,
"sampled.year" ) );
s_ptr->current_year = asInteger( get_list_element( sample,
"current.year" ) );
s_ptr->x0 = asReal( get_list_element( sample, "x0" ) );
if( s_ptr->age <= 0 )
{
s_ptr->age = 0;
}
if( s_ptr->sampled_month <= 0 )
{
s_ptr->sampled_month = 0;
}
/* build the plots vector */
s_ptr->n_points = asInteger( get_list_element( sample, "n.points" ) );
s_ptr->plots_ptr = (struct PLOT_RECORD*)calloc(
s_ptr->n_points, sizeof( struct PLOT_RECORD ) );
/* s_ptr->plots_ptr = (struct PLOT_RECORD*)Calloc( */
/* s_ptr->n_points, struct PLOT_RECORD ); */
plot_list = get_list_element( sample, "plots" );
PROTECT( plot_list = AS_LIST( plot_list ) );
// PROTECT( plot_list );
plot_plot_sexp = get_list_element( plot_list, "plot" );
plot_lat_sexp = get_list_element( plot_list, "latitude" );
plot_long_sexp = get_list_element( plot_list, "longitude" );
plot_elev_sexp = get_list_element( plot_list, "elevation" );
plot_slp_sexp = get_list_element( plot_list, "slope" );
plot_asp_sexp = get_list_element( plot_list, "aspect" );
plot_h20_sexp = get_list_element( plot_list, "whc" );
plot_map_sexp = get_list_element( plot_list, "map" );
PROTECT( plot_plot_sexp = coerceVector( plot_plot_sexp, INTSXP ) );
PROTECT( plot_lat_sexp = coerceVector( plot_lat_sexp, REALSXP ) );
PROTECT( plot_long_sexp = coerceVector( plot_long_sexp, REALSXP ) );
PROTECT( plot_elev_sexp = coerceVector( plot_elev_sexp, REALSXP ) );
PROTECT( plot_slp_sexp = coerceVector( plot_slp_sexp, REALSXP ) );
PROTECT( plot_asp_sexp = coerceVector( plot_asp_sexp, REALSXP ) );
PROTECT( plot_h20_sexp = coerceVector( plot_h20_sexp, REALSXP ) );
PROTECT( plot_map_sexp = coerceVector( plot_map_sexp, REALSXP ) );
/* assign the plot array */
for( i = 0; i < s_ptr->n_points; i++ )
{
s_ptr->plots_ptr[i].plot = INTEGER( plot_plot_sexp )[i];
s_ptr->plots_ptr[i].latitude = REAL( plot_lat_sexp )[i];
s_ptr->plots_ptr[i].longitude = REAL( plot_long_sexp )[i];
s_ptr->plots_ptr[i].elevation = REAL( plot_elev_sexp )[i];
s_ptr->plots_ptr[i].slope = REAL( plot_slp_sexp )[i];
s_ptr->plots_ptr[i].aspect = REAL( plot_asp_sexp )[i];
s_ptr->plots_ptr[i].water_capacity = REAL( plot_h20_sexp )[i];
s_ptr->plots_ptr[i].mean_annual_precip = REAL( plot_map_sexp )[i];
}
// UNPROTECT( 8 );
/* build the plants vector */
s_ptr->n_plants = asInteger( get_list_element( sample, "n.plants" ) );
s_ptr->plants_ptr = (struct PLANT_RECORD*)calloc(
s_ptr->n_plants, sizeof( struct PLANT_RECORD ) );
/* s_ptr->plants_ptr = (struct PLANT_RECORD*)Calloc( */
/* s_ptr->n_plants, struct PLANT_RECORD ); */
/* build the plots vector */
plant_list = get_list_element( sample, "plants" );
PROTECT( plant_list = AS_LIST( plant_list ) );
// PROTECT( plant_list );
plant_plot_sexp = get_list_element( plant_list, "plot" );
plant_plant_sexp = get_list_element( plant_list, "plant" );
plant_sp_code_sexp = get_list_element( plant_list, "sp.code" );
plant_d6_sexp = get_list_element( plant_list, "d6" );
plant_d6_area_sexp = get_list_element( plant_list, "d6.area" );
plant_dbh_sexp = get_list_element( plant_list, "dbh" );
plant_basal_area_sexp = get_list_element( plant_list, "basal.area" );
plant_tht_sexp = get_list_element( plant_list, "tht" );
plant_cr_sexp = get_list_element( plant_list, "cr" );
plant_n_stems_sexp = get_list_element( plant_list, "n.stems" );
plant_expf_sexp = get_list_element( plant_list, "expf" );
plant_crown_width_sexp = get_list_element( plant_list, "crown.width" );
plant_crown_area_sexp = get_list_element( plant_list, "crown.area" );
plant_user_code_sexp = get_list_element( plant_list, "user.code" );
/* read the plants */
PROTECT( plant_plot_sexp = coerceVector( plant_plot_sexp, INTSXP ) );
PROTECT( plant_plant_sexp = coerceVector( plant_plant_sexp, INTSXP ) );
PROTECT( plant_sp_code_sexp = coerceVector( plant_sp_code_sexp, STRSXP
) );
PROTECT( plant_d6_sexp = coerceVector( plant_d6_sexp, REALSXP ) );
PROTECT( plant_d6_area_sexp = coerceVector( plant_d6_area_sexp, REALSXP
) );
PROTECT( plant_dbh_sexp = coerceVector( plant_dbh_sexp, REALSXP ) );
PROTECT( plant_basal_area_sexp = coerceVector( plant_basal_area_sexp,
REALSXP ) );
PROTECT( plant_tht_sexp = coerceVector( plant_tht_sexp, REALSXP ) );
PROTECT( plant_cr_sexp = coerceVector( plant_cr_sexp, REALSXP ) );
PROTECT( plant_n_stems_sexp = coerceVector( plant_n_stems_sexp, INTSXP
) );
PROTECT( plant_expf_sexp = coerceVector( plant_expf_sexp, REALSXP ) );
PROTECT( plant_crown_width_sexp = coerceVector( plant_crown_width_sexp,
REALSXP ) );
PROTECT( plant_crown_area_sexp = coerceVector( plant_crown_area_sexp,
REALSXP ) );
PROTECT( plant_user_code_sexp = coerceVector( plant_user_code_sexp,
INTSXP ) );
/* sort the species codes based on sp_code */
qsort( (void*)SPECIES_PTR,
(size_t)(N_SPECIES),
sizeof( struct SPECIES_RECORD ),
compare_species_by_sp_code );
/* assign the plot array */
for( i = 0; i < s_ptr->n_plants; i++ )
{
s_ptr->plants_ptr[i].plot = INTEGER( plant_plot_sexp )[i];
s_ptr->plants_ptr[i].plant = INTEGER( plant_plant_sexp )[i];
strcpy( temp_sp_code, CHAR( STRING_ELT( plant_sp_code_sexp, i ) ) );
/* get the species code and look up the correct index */
sp_ptr = get_species_entry_from_code( N_SPECIES,
SPECIES_PTR,
temp_sp_code );
if( !sp_ptr )
{
Rprintf( "couldn't find the species code for %s, %s\n",
temp_sp_code, CHAR( STRING_ELT( plant_sp_code_sexp, i ) ) );
continue;
}
/* this is the index of the "unsorted" array */
s_ptr->plants_ptr[i].sp_idx = sp_ptr->idx;
s_ptr->plants_ptr[i].d6 = REAL( plant_d6_sexp )[i];
s_ptr->plants_ptr[i].d6_area = REAL( plant_d6_area_sexp )[i];
s_ptr->plants_ptr[i].dbh = REAL( plant_dbh_sexp )[i];
s_ptr->plants_ptr[i].basal_area = REAL( plant_basal_area_sexp )[i];
s_ptr->plants_ptr[i].tht = REAL( plant_tht_sexp )[i];
s_ptr->plants_ptr[i].cr = REAL( plant_cr_sexp )[i];
s_ptr->plants_ptr[i].n_stems = INTEGER( plant_n_stems_sexp )[i];
s_ptr->plants_ptr[i].expf = REAL( plant_expf_sexp )[i];
s_ptr->plants_ptr[i].crown_width = REAL( plant_crown_width_sexp )[i];
s_ptr->plants_ptr[i].crown_area = REAL( plant_crown_area_sexp )[i];
s_ptr->plants_ptr[i].user_code = INTEGER( plant_user_code_sexp )[i];
/* Rprintf( "dbh = %lf\n", s_ptr->plants_ptr[i].dbh ); */
/* perform some basic error checking here */
/* see if you can use the ISNAN macro here */
/* try the isnan macro */
/* if( ISNAN( REAL( plant_d6_sexp )[i] ) || s_ptr->plants_ptr[i].d6
< 0.0 ) */
/* { */
/* s_ptr->plants_ptr[i].d6 = 0.0; */
/* } */
if( ISNA( REAL( plant_d6_sexp )[i] ) ||
ISNAN( REAL( plant_d6_sexp )[i] ) ||
s_ptr->plants_ptr[i].d6 < 0.0 )
{
s_ptr->plants_ptr[i].d6 = 0.0;
}
if( ISNA( REAL( plant_dbh_sexp )[i] ) ||
ISNAN( REAL( plant_dbh_sexp )[i] ) ||
s_ptr->plants_ptr[i].dbh < 0.0 )
{
s_ptr->plants_ptr[i].dbh = 0.0;
}
if( ISNAN( REAL( plant_tht_sexp )[i] ) || s_ptr->plants_ptr[i].expf
< 0.0 )
{
s_ptr->plants_ptr[i].tht = 0.0;
}
if( ISNAN( REAL( plant_cr_sexp )[i] ) || s_ptr->plants_ptr[i].cr <
0.0 )
{
s_ptr->plants_ptr[i].cr = 0.0;
}
if( ISNAN( REAL( plant_expf_sexp )[i] ) ||
s_ptr->plants_ptr[i].expf < 0.0 )
{
s_ptr->plants_ptr[i].expf = 0.0;
}
if( ISNAN( REAL( plant_crown_width_sexp )[i] ) ||
s_ptr->plants_ptr[i].crown_width < 0.0 )
{
s_ptr->plants_ptr[i].crown_width = 0.0;
}
if( ISNAN( REAL( plant_crown_area_sexp )[i] ) ||
s_ptr->plants_ptr[i].crown_area < 0.0 )
{
s_ptr->plants_ptr[i].crown_area = 0.0;
}
/* s_ptr->plants_ptr[i].crown_area = REAL( plant_crown_area_sexp
)[i]; */
}
/* now sort the species back to the "native" order (by index) */
qsort( (void*)SPECIES_PTR,
(size_t)(N_SPECIES),
sizeof( struct SPECIES_RECORD ),
compare_species_by_idx );
UNPROTECT( 8 ); /* plot lists */
UNPROTECT( 14 );
UNPROTECT( 1 ); /* plot_list */
UNPROTECT( 1 ); /* plant list */
return s_ptr;
}
I appolgise for the long email, but I'd rather appolgize than ask permission.
I'm sure there's something I don't understand about the PROTECT/UNPROTECT
sequence as this seems to work on smaller data.frames
Thanks,
Jeff.
--
Jeff D. Hamann
Forest Informatics, Inc.
PO Box 1421
Corvallis, Oregon 97339-1421
phone 541-754-1428
fax 541-752-0288
jeff.hamann at forestinformatics.com
http://www.forestinformatics.com
More information about the R-devel
mailing list