[Rd] [R] HTTP User-Agent header
Seth Falcon
sfalcon at fhcrc.org
Sun Jul 30 16:39:04 CEST 2006
Robert Gentleman <rgentlem at fhcrc.org> writes:
> OK, that suggests setting at the options level would solve both of your
> problems and that seems like the best approach. I don't really want to
> pass this around as a parameter through the maze of functions that might
> actually download something if we don't have to.
I have an updated patch that adds an HTTPUserAgent option. The
default is a string like:
R (2.4.0 x86_64-unknown-linux-gnu x86_64 linux-gnu)
If the HTTPUserAgent option is NULL, no user agent header is added to
HTTP requests (this is the current behavior). This option allows R to
use an arbitrary user agent header.
The patch adds two non-exported functions to utils:
1) defaultUserAgent - returns a string like above
2) makeUserAgent - formats content of HTTPUserAgent option for use
as part of an HTTP request header.
I've tested on OSX and Linux, but not on Windows. When USE_WININET is
defined, a user agent string of "R" was already being used. With this
patch, the HTTPUserAgent options is used. I'm unsure if NULL is
allowed.
Also, in src/main/internet.c there is a comment:
"Next 6 are for use by libxml, only"
and then a definition for R_HTTPOpen. Not sure how/when these get
used. The user agent for these calls remains unspecified with this
patch.
+ seth
Patch summary:
src/include/R_ext/R-ftp-http.h | 2 +-
src/include/Rmodules/Rinternet.h | 2 +-
src/library/base/man/options.Rd | 5 +++++
src/library/utils/R/readhttp.R | 25 +++++++++++++++++++++++++
src/library/utils/R/zzz.R | 3 ++-
src/main/internet.c | 2 +-
src/modules/internet/internet.c | 37 +++++++++++++++++++++++++------------
src/modules/internet/nanohttp.c | 8 ++++++--
8 files changed, 66 insertions(+), 18 deletions(-)
Index: src/include/R_ext/R-ftp-http.h
===================================================================
--- src/include/R_ext/R-ftp-http.h (revision 38715)
+++ src/include/R_ext/R-ftp-http.h (working copy)
@@ -36,7 +36,7 @@
int R_FTPRead(void *ctx, char *dest, int len);
void R_FTPClose(void *ctx);
-void * RxmlNanoHTTPOpen(const char *URL, char **contentType, int cacheOK);
+void * RxmlNanoHTTPOpen(const char *URL, char **contentType, const char *headers, int cacheOK);
int RxmlNanoHTTPRead(void *ctx, void *dest, int len);
void RxmlNanoHTTPClose(void *ctx);
int RxmlNanoHTTPReturnCode(void *ctx);
Index: src/include/Rmodules/Rinternet.h
===================================================================
--- src/include/Rmodules/Rinternet.h (revision 38715)
+++ src/include/Rmodules/Rinternet.h (working copy)
@@ -9,7 +9,7 @@
typedef Rconnection (*R_NewUrlRoutine)(char *description, char *mode);
typedef Rconnection (*R_NewSockRoutine)(char *host, int port, int server, char *mode);
-typedef void * (*R_HTTPOpenRoutine)(const char *url, const int cacheOK);
+typedef void * (*R_HTTPOpenRoutine)(const char *url, const char *headers, const int cacheOK);
typedef int (*R_HTTPReadRoutine)(void *ctx, char *dest, int len);
typedef void (*R_HTTPCloseRoutine)(void *ctx);
Index: src/main/internet.c
===================================================================
--- src/main/internet.c (revision 38715)
+++ src/main/internet.c (working copy)
@@ -129,7 +129,7 @@
{
if(!initialized) internet_Init();
if(initialized > 0)
- return (*ptr->HTTPOpen)(url, 0);
+ return (*ptr->HTTPOpen)(url, NULL, 0);
else {
error(_("internet routines cannot be loaded"));
return NULL;
Index: src/library/utils/R/zzz.R
===================================================================
--- src/library/utils/R/zzz.R (revision 38715)
+++ src/library/utils/R/zzz.R (working copy)
@@ -9,7 +9,8 @@
internet.info = 2,
pkgType = .Platform$pkgType,
str = list(strict.width = "no"),
- example.ask = "default")
+ example.ask = "default",
+ HTTPUserAgent = defaultUserAgent())
extra <-
if(.Platform$OS.type == "windows") {
list(mailer = "none",
Index: src/library/utils/R/readhttp.R
===================================================================
--- src/library/utils/R/readhttp.R (revision 38715)
+++ src/library/utils/R/readhttp.R (working copy)
@@ -6,3 +6,28 @@
stop("transfer failure")
file.show(file, delete.file = delete.file, title = title, ...)
}
+
+
+
+defaultUserAgent <- function()
+{
+ Rver <- paste(R.version$major, R.version$minor, sep=".")
+ Rdetails <- paste(Rver, R.version$platform, R.version$arch,
+ R.version$os)
+ paste("R (", Rdetails, ")", sep="")
+}
+
+
+makeUserAgent <- function(format = TRUE) {
+ agent <- getOption("HTTPUserAgent")
+ if (is.null(agent)) {
+ return(NULL)
+ }
+ if (length(agent) != 1)
+ stop(sQuote("HTTPUserAgent"),
+ " option must be a length one character vector or NULL")
+ if (format)
+ paste("User-Agent: ", agent[1], "\r\n", sep = "")
+ else
+ agent[1]
+}
Index: src/library/base/man/options.Rd
===================================================================
--- src/library/base/man/options.Rd (revision 38715)
+++ src/library/base/man/options.Rd (working copy)
@@ -368,6 +368,11 @@
\item{\code{help.try.all.packages}:}{default for an argument of
\code{\link{help}}.}
+ \item{\code{HTTPUserAgent}:}{string used as the user agent in HTTP
+ requests. If \code{NULL}, HTTP requests will be made without a
+ user agent header. The default is \code{R (<version> <platform>
+ <arch> <os>)}}
+
\item{\code{internet.info}:}{The minimum level of information to be
printed on URL downloads etc. Default is 2, for failure causes.
Set to 1 or 0 to get more information.}
Index: src/modules/internet/internet.c
===================================================================
--- src/modules/internet/internet.c (revision 38715)
+++ src/modules/internet/internet.c (working copy)
@@ -28,7 +28,7 @@
#include <Rconnections.h>
#include <R_ext/R-ftp-http.h>
-static void *in_R_HTTPOpen(const char *url, const int cacheOK);
+static void *in_R_HTTPOpen(const char *url, const char *headers, const int cacheOK);
static int in_R_HTTPRead(void *ctx, char *dest, int len);
static void in_R_HTTPClose(void *ctx);
@@ -70,7 +70,7 @@
switch(type) {
case HTTPsh:
- ctxt = in_R_HTTPOpen(url, 0);
+ ctxt = in_R_HTTPOpen(url, NULL, 0);
if(ctxt == NULL) {
/* if we call error() we get a connection leak*/
/* so do_url has to raise the error*/
@@ -238,14 +238,14 @@
}
#endif
-/* download(url, destfile, quiet, mode, cacheOK) */
+/* download(url, destfile, quiet, mode, headers, cacheOK) */
#define CPBUFSIZE 65536
#define IBUFSIZE 4096
static SEXP in_do_download(SEXP call, SEXP op, SEXP args, SEXP env)
{
- SEXP ans, scmd, sfile, smode;
- char *url, *file, *mode;
+ SEXP ans, scmd, sfile, smode, sheaders, agentFun;
+ char *url, *file, *mode, *headers;
int quiet, status = 0, cacheOK;
checkArity(op, args);
@@ -271,6 +271,17 @@
cacheOK = asLogical(CAR(args));
if(cacheOK == NA_LOGICAL)
error(_("invalid '%s' argument"), "cacheOK");
+#ifdef USE_WININET
+ PROTECT(agentFun = lang2(install("makeUserAgent"), ScalarLogical(0)));
+#else
+ PROTECT(agentFun = lang1(install("makeUserAgent")));
+#endif
+ PROTECT(sheaders = eval(agentFun, R_FindNamespace(mkString("utils"))));
+ UNPROTECT(1);
+ if(TYPEOF(sheaders) == NILSXP)
+ headers = NULL;
+ else
+ headers = CHAR(STRING_ELT(sheaders, 0));
#ifdef Win32
if (!pbar.wprog) {
pbar.wprog = newwindow(_("Download progress"), rect(0, 0, 540, 100),
@@ -319,7 +330,7 @@
#ifdef Win32
R_FlushConsole();
#endif
- ctxt = in_R_HTTPOpen(url, cacheOK);
+ ctxt = in_R_HTTPOpen(url, headers, cacheOK);
if(ctxt == NULL) status = 1;
else {
if(!quiet) REprintf(_("opened URL\n"), url);
@@ -466,14 +477,14 @@
PROTECT(ans = allocVector(INTSXP, 1));
INTEGER(ans)[0] = status;
- UNPROTECT(1);
+ UNPROTECT(2);
return ans;
}
#if defined(SUPPORT_LIBXML) && !defined(USE_WININET)
-void *in_R_HTTPOpen(const char *url, int cacheOK)
+void *in_R_HTTPOpen(const char *url, const char *headers, const int cacheOK)
{
inetconn *con;
void *ctxt;
@@ -484,7 +495,7 @@
if(timeout == NA_INTEGER || timeout <= 0) timeout = 60;
RxmlNanoHTTPTimeout(timeout);
- ctxt = RxmlNanoHTTPOpen(url, NULL, cacheOK);
+ ctxt = RxmlNanoHTTPOpen(url, NULL, headers, cacheOK);
if(ctxt != NULL) {
int rc = RxmlNanoHTTPReturnCode(ctxt);
if(rc != 200) {
@@ -605,7 +616,8 @@
}
#endif /* USE_WININET_ASYNC */
-static void *in_R_HTTPOpen(const char *url, const int cacheOK)
+static void *in_R_HTTPOpen(const char *url, const char *headers,
+ const int cacheOK)
{
WIctxt wictxt;
DWORD status, d1 = 4, d2 = 0, d3 = 100;
@@ -622,7 +634,7 @@
wictxt->length = -1;
wictxt->type = NULL;
wictxt->hand =
- InternetOpen("R", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL,
+ InternetOpen(headers, INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL,
#ifdef USE_WININET_ASYNC
INTERNET_FLAG_ASYNC
#else
@@ -870,7 +882,8 @@
#endif
#ifndef HAVE_INTERNET
-static void *in_R_HTTPOpen(const char *url, const int cacheOK)
+static void *in_R_HTTPOpen(const char *url, const char *headers,
+ const int cacheOK)
{
return NULL;
}
Index: src/modules/internet/nanohttp.c
===================================================================
--- src/modules/internet/nanohttp.c (revision 38715)
+++ src/modules/internet/nanohttp.c (working copy)
@@ -1034,6 +1034,9 @@
* @contentType: if available the Content-Type information will be
* returned at that location
*
+ * @headers: headers to be used in the HTTP request. These must be name/value
+ * pairs separated by ':', each on their own line.
+ *
* This function try to open a connection to the indicated resource
* via HTTP GET.
*
@@ -1042,10 +1045,11 @@
*/
void*
-RxmlNanoHTTPOpen(const char *URL, char **contentType, int cacheOK)
+RxmlNanoHTTPOpen(const char *URL, char **contentType, const char *headers,
+ int cacheOK)
{
if (contentType != NULL) *contentType = NULL;
- return RxmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, cacheOK);
+ return RxmlNanoHTTPMethod(URL, NULL, NULL, contentType, headers, cacheOK);
}
/**
More information about the R-devel
mailing list