#### iconv() behavior depending on Locales LC_CTYPE in Windows #### ======= ============================== ### ### In a *shell* in Windows (emacs), after doing R.home() in R, use that to do something like ### c:/PROGRA~1/R/R-devel/bin/R CMD BATCH iconv-Windows.R ### ^^^^^^^^^^^^^^^^^^^^^^^^^^= === ===== =============== ==> producing iconv-Windows.Rout ### sessionInfo() ## does not matter so much ## -- should be Windows to exhibit the problems ## From help(iconv) 's example : Using "latin1" European language letters: x1 <- c("Ekstr\xf8m", "J\xf6reskog", "bi\xdfchen Z\xfcrcher") Encoding(x1) <- "latin1" xU <- iconv(x1, "latin1", "UTF-8") ## 2 locales that do not work well : --------------------------------- Sys.setlocale("LC_CTYPE", "Chinese") iconv(x1, "latin1", "") # NA NA NA iconv(x1, "latin1", "//TRANSLIT") # perfect for Chinese iconv(x1, "latin1", "", sub = "byte") iconv(xU, "UTF-8", "") # NA NA NA iconv(xU, "UTF-8", "//TRANSLIT") iconv(xU, "UTF-8", "", sub = "byte") ##-- Sys.setlocale("LC_CTYPE", "Arabic") iconv(x1, "latin1", "") # NA NA NA iconv(x1, "latin1", "//TRANSLIT") # not bad, but not perfect iconv(x1, "latin1", "", sub="byte") iconv(x1, "latin1", "", sub="?") iconv(xU, "UTF-8", "") # NA NA NA iconv(xU, "UTF-8", "//TRANSLIT") iconv(xU, "UTF-8", "", sub="byte") iconv(xU, "UTF-8", "", sub="?") ## 2 locales that work well for these examples (no wonder) ----------- Sys.setlocale("LC_CTYPE", "German_Switzerland") iconv(x1, "latin1", "") iconv(x1, "latin1", "//TRANSLIT") iconv(x1, "latin1", "", sub="?") iconv(xU, "UTF-8", "") iconv(xU, "UTF-8", "//TRANSLIT") iconv(xU, "UTF-8", "", sub="?") ##-- Sys.setlocale("LC_CTYPE", "English") iconv(x1, "latin1", "") iconv(x1, "latin1", "//TRANSLIT") iconv(x1, "latin1", "", sub="?") iconv(xU, "UTF-8", "") iconv(xU, "UTF-8", "//TRANSLIT") iconv(xU, "UTF-8", "", sub="?")