[Rd] spss long labels

Kurt Van Dijck kurt.van.dijck at skynet.be
Wed Jul 9 10:05:39 CEST 2008


Hi all,

I got no feedback at all concerning the merge of this patch in the source tree.
Am I supposed to do this myself? How should I do this (do I have subversion commit
access)? Is this patch acceptable at all? Is it being tested?

I got some personal reactions on my post, proving there is general interest in
getting rid of the inconvenience of importing long labels from SPSS files.



Kurt Van Dijck wrote:
> Hi,
> 
> A frequently seen issue with importing SPSS data files, is that R does
> not import the 'long variable names'.
> I built a patch on the R-project's foreign module, in order to import
> the 'long variable names' from SPSS (record 7, subtype 13).
> To complete the job, I had to expand the "struct variable" definition
> to have 64 +1 charachters. I'm not aware of side effects.
> The sfm-read.c code works fine.
> I didn't test a variety of platforms, as I don't have an idea of what is
> regarded as sufficient testing. Anyway, I don't expect major troubles there
> (no byteswapping problems, no 32<->64 bit issues) as it's mainly 
> character processing.
> The patch is relative to the foreign directory. It was created
> against the trunk of R-project yesterday.
> 
> We would appreciate that you import such patch into the main tree.
> 
> Kind regards,
> 
> Kurt Van Dijck (C programmer) & Ilse Laurijssen (R user)
> Belgium
> 
> Index: src/sfm-read.c
> ===================================================================
> --- src/sfm-read.c    (revision 5168)
> +++ src/sfm-read.c    (working copy)
> @@ -188,6 +188,8 @@
>  static int read_variables (struct file_handle * h, struct variable *** 
> var_by_index);
>  static int read_machine_int32_info (struct file_handle * h, int size, 
> int count, int *encoding);
>  static int read_machine_flt64_info (struct file_handle * h, int size, 
> int count);
> +static int read_long_var_names (struct file_handle * h, struct 
> dictionary *
> +        , unsigned long size, unsigned int count);
>  static int read_documents (struct file_handle * h);
> 
>  /* Displays the message X with corrupt_msg, then jumps to the lossage
> @@ -418,11 +420,15 @@
>          break;
> 
>            case 7: /* Multiple-response sets (later versions of SPSS). */
> -          case 13:  /* long variable names. PSPP now has code for these
> -               that could be ported if someone is interested. */
>          skip = 1;
>          break;
> 
> +          case 13:    /* long variable names. PSPP now has code for these
> +               that could be ported if someone is interested. */
> +        if (!read_long_var_names(h, ext->dict, data.size, data.count))
> +          goto lossage;
> +        break;
> +
>            case 16: /* See 
> http://www.nabble.com/problem-loading-SPSS-15.0-save-files-t2726500.html */
>          skip = 1;
>          break;
> @@ -584,14 +590,72 @@
>    return 0;
>  }
> 
> +/* Read record type 7, subtype 13.
> + * long variable names
> + */
>  static int
> +read_long_var_names (struct file_handle * h, struct dictionary * dict
> +        , unsigned long size, unsigned int count)
> +{
> +  char * data;
> +  unsigned int j;
> +  struct variable ** lp;
> +  struct variable ** end;
> +  char * p;
> +  char * endp;
> +  char * val;
> +  if ((1 != size)||(0 == count)) {
> +    warning("%s: strange record info seen, size=%u, count=%u"
> +      ", ignoring long variable names"
> +      , h->fn, size, count);
> +    return 0;
> +  }
> +  size *= count;
> +  data = Calloc (size +1, char);
> +  bufread(h, data, size, 0);
> +  /* parse */
> +  end = &dict->var[dict->nvar];
> +  p = data;
> +  do {
> +    if (0 != (endp = strchr(p, '\t')))
> +      *endp = 0; /* put null terminator */
> +    if (0 == (val = strchr(p, '='))) {
> +      warning("%s: no long variable name for variable '%s'", h->fn, p);
> +    } else {
> +      *val = 0;
> +      ++val;
> +      /* now, p is key, val is long name */
> +      for (lp = dict->var; lp < end; ++lp) {
> +        if (!strcmp(lp[0]->name, p)) {
> +          strncpy(lp[0]->name, val, sizeof(lp[0]->name));
> +          break;
> +        }
> +      }
> +      if (lp >= end) {
> +        warning("%s: long variable name mapping '%s' to '%s'"
> +        "for variable which does not exist"
> +        , h->fn, p, val);
> +      }
> +    }
> +    p = &endp[1]; /* put to next */
> +  } while (endp);
> +
> +  free(data);
> +  return 1;
> +
> +lossage:
> +  free(data);
> +  return 0;
> +}
> +
> +static int
>  read_header (struct file_handle * h, struct sfm_read_info * inf)
>  {
>    struct sfm_fhuser_ext *ext = h->ext;    /* File extension strcut. */
>    struct sysfile_header hdr;        /* Disk buffer. */
>    struct dictionary *dict;        /* File dictionary. */
>    char prod_name[sizeof hdr.prod_name + 1];    /* Buffer for product 
> name. */
> -  int skip_amt = 0;                /* Amount of product name to omit. */
> +  int skip_amt = 0;            /* Amount of product name to omit. */
>    int i;
> 
>    /* Create the dictionary. */
> @@ -1495,7 +1559,7 @@
>  /* Reads one case from system file H into the value array PERM
>     according to the instructions given in associated dictionary DICT,
>     which must have the get.* elements appropriately set.  Returns
> -   nonzero only if successful.  */
> +   nonzero only if successful.    */
>  int
>  sfm_read_case (struct file_handle * h, union value * perm, struct 
> dictionary * dict)
>  {
> Index: src/var.h.in
> ===================================================================
> --- src/var.h.in    (revision 5168)
> +++ src/var.h.in    (working copy)
> @@ -41,6 +41,10 @@
>  #error MAX_SHORT_STRING must be less than 8.
>  #endif
> 
> +/* VAR_NAME_LEN: the length of a variable.
> + * SPSS supports names of 64 long
> + */
> +#define VAR_NAME_LEN 64
>  /* Special values. */
>  #define SYSMIS (-DBL_MAX)
>  #define LOWEST second_lowest_double_val()
> @@ -228,7 +232,7 @@
>  /* MODIFY VARS private data. */
>  struct modify_vars_proc
>    {
> -    char new_name[9];        /* Variable's new name. */
> +    char new_name[VAR_NAME_LEN +1];        /* Variable's new name. */
>      int drop_this_var;        /* 0=keep this var, 1=drop this var. */
>      struct variable *next;    /* Next in linked list. */
>    };
> @@ -302,7 +306,7 @@
>  struct variable
>    {
>      /* Required by parse_variables() to be in this order.  */
> -    char name[9];        /* As a string. */
> +    char name[VAR_NAME_LEN +1];    /* As a string. */
>      int index;            /* Index into its dictionary's var[]. */
>      int type;            /* NUMERIC or ALPHA. */
>      int foo;            /* Used for temporary storage. */
> @@ -373,9 +377,9 @@
> 
>      int weight_index;        /* `value' index of $WEIGHT, or -1 if none.
>                     Call update_weighting() before using! */
> -    char weight_var[9];        /* Name of WEIGHT variable. */
> +    char weight_var[VAR_NAME_LEN];/* Name of WEIGHT variable. */
> 
> -    char filter_var[9];        /* Name of FILTER variable. */
> +    char filter_var[VAR_NAME_LEN];/* Name of FILTER variable. */
>      /* Do not make another field the last field! or see
>         temporary.c:restore_dictionary() before doing so! */
>    };
> 
> ______________________________________________
> R-devel at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-devel



More information about the R-devel mailing list