[Rd] chartr better
Prof Brian Ripley
ripley at stats.ox.ac.uk
Wed Sep 19 11:11:12 CEST 2007
Thank you for this.
Unfortunately qsort and bsearch are C99 functions, and I don't think we
can safely assume that they are present. So I am going to put this into
R-devel but not 2.6.0. When we have a bit more experience we can consider
backporting it to 2.6.1 (I don't want to change the autoconf stuff at this
stage).
Brian Ripley
On Thu, 13 Sep 2007, Ei-ji Nakama wrote:
> For example, the following changes are necessary when i convert a
> Japanese hiragana into katakana in chattr.
>
> R code:
>> chartr("\u3041-\u3093","\u30a1-\u30f3","\u3084\u3063\u305f\u30fc")
>
> --- R-alpha.orig/src/main/character.c 2007-09-05 07:13:27.000000000 +0900
> +++ R-alpha/src/main/character.c 2007-09-13 16:10:21.000000000 +0900
> @@ -2041,6 +2041,16 @@
> return(c);
> }
>
> +typedef struct { wchar_t c_old, c_new; } xtable_t;
> +static inline int xtable_comp(const xtable_t *a, const xtable_t *b)
> +{
> + return a->c_old - b->c_old;
> +}
> +static inline int xtable_key_comp(const wchar_t *a, const xtable_t *b)
> +{
> + return *a - b->c_old;
> +}
> +
> SEXP attribute_hidden do_chartr(SEXP call, SEXP op, SEXP args, SEXP env)
> {
> SEXP old, _new, x, y;
> @@ -2064,14 +2074,18 @@
> #ifdef SUPPORT_MBCS
> if(mbcslocale) {
> int j, nb, nc;
> - wchar_t xtable[65536 + 1], c_old, c_new, *wc;
> + xtable_t *xtable;
> + int xtable_cnt;
> + wchar_t c_old, c_new, *wc;
> const char *xi, *s;
> struct wtr_spec *trs_old, **trs_old_ptr;
> struct wtr_spec *trs_new, **trs_new_ptr;
> -
> - for(i = 0; i <= UCHAR_MAX; i++) xtable[i] = i;
> + struct wtr_spec *trs_cnt, **trs_cnt_ptr;
>
> /* Initialize the old and new wtr_spec lists. */
> + trs_cnt = Calloc(1, struct wtr_spec);
> + trs_cnt->type = WTR_INIT;
> + trs_cnt->next = NULL;
> trs_old = Calloc(1, struct wtr_spec);
> trs_old->type = WTR_INIT;
> trs_old->next = NULL;
> @@ -2084,6 +2098,7 @@
> if(nc < 0) error(_("invalid multibyte string 'old'"));
> wc = (wchar_t *) R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff);
> mbstowcs(wc, s, nc + 1);
> + wtr_build_spec(wc, trs_cnt); /* use count only */
> wtr_build_spec(wc, trs_old);
>
> s = translateChar(STRING_ELT(_new, 0));
> @@ -2096,38 +2111,54 @@
> /* Initialize the pointers for walking through the old and new
> wtr_spec lists and retrieving the next chars from the lists.
> */
> + trs_cnt_ptr = Calloc(1, struct wtr_spec *);
> + *trs_cnt_ptr = trs_cnt->next;
> + for( xtable_cnt = 0 ; wtr_get_next_char_from_spec(trs_cnt_ptr)
> ;xtable_cnt++ );
> + Free(trs_cnt_ptr);
> + xtable = (xtable_t *)R_alloc(xtable_cnt+1,sizeof(xtable_t));
> +
> trs_old_ptr = Calloc(1, struct wtr_spec *);
> *trs_old_ptr = trs_old->next;
> trs_new_ptr = Calloc(1, struct wtr_spec *);
> *trs_new_ptr = trs_new->next;
> - for(;;) {
> + for(i=0;;i++) {
> c_old = wtr_get_next_char_from_spec(trs_old_ptr);
> c_new = wtr_get_next_char_from_spec(trs_new_ptr);
> if(c_old == '\0')
> break;
> else if(c_new == '\0')
> error(_("'old' is longer than 'new'"));
> - else
> - xtable[c_old] = c_new;
> + else{
> + xtable[i].c_old=c_old;
> + xtable[i].c_new=c_new;
> + }
> }
> +
> /* Free the memory occupied by the wtr_spec lists. */
> wtr_free_spec(trs_old);
> wtr_free_spec(trs_new);
> Free(trs_old_ptr); Free(trs_new_ptr);
>
> + qsort(xtable, xtable_cnt, sizeof(xtable_t),
> + (int(*)(const void *, const void *))xtable_comp);
> +
> n = LENGTH(x);
> PROTECT(y = allocVector(STRSXP, n));
> for(i = 0; i < n; i++) {
> if (STRING_ELT(x,i) == NA_STRING)
> SET_STRING_ELT(y, i, NA_STRING);
> else {
> + xtable_t *tbl;
> xi = translateChar(STRING_ELT(x, i));
> nc = mbstowcs(NULL, xi, 0);
> if(nc < 0)
> error(_("invalid input multibyte string %d"), i+1);
> wc = (wchar_t *)
> R_AllocStringBuffer((nc+1)*sizeof(wchar_t), &cbuff);
> mbstowcs(wc, xi, nc + 1);
> - for(j = 0; j < nc; j++) wc[j] = xtable[wc[j]];
> + for(j = 0; j < nc; j++)
> + if (tbl = bsearch(&wc[j], xtable, xtable_cnt,
> sizeof(xtable_t),
> + (int(*)(const void *, const
> void *))xtable_key_comp))
> + wc[j]=tbl->c_new;
> nb = wcstombs(NULL, wc, 0);
> cbuf = CallocCharBuf(nb);
> wcstombs(cbuf, wc, nb + 1);
>
>
--
Brian D. Ripley, ripley at stats.ox.ac.uk
Professor of Applied Statistics, http://www.stats.ox.ac.uk/~ripley/
University of Oxford, Tel: +44 1865 272861 (self)
1 South Parks Road, +44 1865 272866 (PA)
Oxford OX1 3TG, UK Fax: +44 1865 272595
More information about the R-devel
mailing list