[RsR] Problem re-implementing whimed from package robustbase

Kaveh k@veh@v@k||| @end|ng |rom w|@@ku|euven@be
Sat Jun 23 21:27:25 CEST 2012


Ok, "for make benefit" of future generations, i've attached to this 
mail the ,stand alone' version of qn_sn.c (i've removed sn and 200 
loc of bell and whistles to make it shorter). 

I guess now i will feed both this and my implementation to a 
debugger and see what is causing the strange divergence....

#include <stdlib.h>
#include <stdbool.h> 
#include <stdio.h>   
#include <inttypes.h>
#include <R.h>
#include <Rmath.h> /* -> <math.h> and much more */

double qn0(double *x, int n);
double whimed_i(double *a, int *w, int n, double* a_cand, double *a_psrt, int 
*w_cand);
double pull(double *a_in, int n, int k);
int cmp (const void *a, const void *b);
int main();


double whimed_i(double *a, int *w, int n, double* a_cand, double *a_psrt, int 
*w_cand){
/*
  Algorithm to compute the weighted high median in O(n) time.

  The whimed is defined as the smallest a[j] such that the sum
  of the weights of all a[i] <= a[j] is strictly greater than
  half of the total weight.

  Arguments:

  a: double array containing the observations
  n: number of observations
  w: array of (int/double) weights of the observations.
*/

    int n2, i, kcand,mm=0,vv1=1;
    /* sum of weights: `int' do overflow when  n ~>= 1e5 */
    int wleft, wmid, wright, w_tot, wrest;

    double trial,s_apsrt;

    w_tot = 0;
    for (i = 0; i < n; ++i)
	w_tot += w[i];
    wrest = 0;

/* REPEAT : */
    do {
	for (i = 0; i < n; ++i){
	    a_psrt[i] = a[i];
	}
	n2 = n/2;/* =^= n/2 +1 with 0-indexing */
	trial = pull(a_psrt, n, n2);;
	wleft = 0;    wmid  = 0;    wright= 0;
	for (i = 0; i < n; ++i) {
	    if (a[i] < trial)	wleft += w[i];
	    else if (a[i] > trial) wright += w[i];
	    else wmid += w[i];
	}
	/* wleft = sum_{i; a[i]	 < trial}  w[i]
	 * wmid	 = sum_{i; a[i] == trial}  w[i] at least one 'i' since trial is 
one a[]!
	 * wright= sum_{i; a[i]	 > trial}  w[i]
	 */
	kcand = 0;
	if (2 * (wrest + wleft) > w_tot) {
	    for (i = 0; i < n; ++i) {
		if (a[i] < trial) {
		    a_cand[kcand] = a[i];
		    w_cand[kcand] = w[i];	
		    ++kcand;
		}
	    }
	}
	else if (2 * (wrest + wleft + wmid) <= w_tot) {
	    for (i = 0; i < n; ++i) {
		if (a[i] > trial) {
		    a_cand[kcand] = a[i];
		    w_cand[kcand] = w[i];	
                    ++kcand;
		}
	    }
	    wrest += wleft + wmid;
	}
	else {
	    return trial;
	}
	mm++;
	n = kcand;
        s_apsrt=0;
	for (i = 0;i<n;++i) {
	    a[i] = a_cand[i];
	    w[i] = w_cand[i];
	    s_apsrt += a[i];
	}

    } while(1);

} /* _WHIMED_ */

/* Main routines for C API */
//double qn(double *x, int n, int finite_corr);
/* these have no extra factors (no consistency factor & finite_corr): */


/* ----------- Implementations -----------------------------------*/

//void Qn0(double *x, Sint *n, double *res) { *res = qn0(x, (int)*n); }

int cmp (const void *pa, const void *pb){
	double a = *(const double*)pa;
   double b = *(const double*)pb;
  return (int) (a - b);
}

double qn0(double *x, int n){
/*--------------------------------------------------------------------

   Efficient algorithm for the scale estimator:

       Q*_n = { |x_i - x_j|; i<j }_(k)	[= Qn without scaling ]

		i.e. the k-th order statistic of the |x_i - x_j|

   Parameters of the function Qn :
       x  : double array containing the observations
       n  : number of observations (n >=2)
 */

    double *y	  = (double *)calloc(n, sizeof(double));
    double *work  = (double *)calloc(n, sizeof(double));
    double *a_psrt = (double *)calloc(n, sizeof(double));
    double *a_cand = (double *)calloc(n, sizeof(double));

    int *left	  = (int *)calloc(n, sizeof(int));
    int *right	  = (int *)calloc(n, sizeof(int));
    int *p	  = (int *)calloc(n, sizeof(int));
    int *q	  = (int *)calloc(n, sizeof(int));
    int *weight	  = (int *)calloc(n, sizeof(int));

    double trial;
    bool found;

    double w_tot=0.0;

    int h, i, j,jj,jh,vv2=3,ll=0;
    /* Following should be `long long int' : they can be of order n^2 */
    int64_t k, knew, nl,nr, sump,sumq;

    h = n / 2 + 1;
    k = (int64_t)h * (h - 1) / 2;
    for (i = 0; i < n; ++i) {
	y[i] = x[i];
	left [i] = n - i + 1;
	right[i] = (i <= h) ? n : n - (i - h);
	/* the n - (i-h) is from the paper; original code had `n' */
    }
  //  R_qsort(y, 1, n); /* y := sort(x) */
    qsort(y,n,sizeof(double),cmp);
    nl = (int64_t)n * (n + 1) / 2;
    nr = (int64_t)n * n;
    knew = k + nl;/* = k + (n+1 \over 2) */
    found = FALSE;
#ifdef DEBUG_qn
    REprintf("qn0(): h,k= %2d,%2d;  nl,nr= %d,%d\n", h,k, nl,nr);
#endif
/* L200: */
    while(!found && nr - nl > n) {
	j = 0;
	/* Truncation to float :
	   try to make sure that the same values are got later (guard bits !) */
	for (i = 1; i < n; ++i) {
	    if (left[i] <= right[i]) {
		weight[j] = right[i] - left[i] + 1;
		jh = left[i] + weight[j] / 2;
		work[j] = (float)(y[i] - y[n - jh]);
		++j;
	    }
	}
	trial = whimed_i(work, weight, j, a_cand, a_psrt, /*iw_cand*/ p);
#ifdef DEBUG_qn
	REprintf(" ..!found: whimed(");
#  ifdef DEBUG_long
	REprintf("wrk=c(");
	for(i=0; i < j; i++) REprintf("%s%g", (i>0)? ", " : "", work[i]);
	REprintf("),\n	   wgt=c(");
	for(i=0; i < j; i++) REprintf("%s%d", (i>0)? ", " : "", weight[i]);
	REprintf("), j= %3d) -> trial= %7g\n", j, trial);
#  else
	REprintf("j=%3d) -> trial= %g:", j, trial);
#  endif
#endif
	j = 0;
	for (i = n - 1; i >= 0; --i) {
	    while (j < n && ((float)(y[i] - y[n - j - 1])) < trial)
		++j;
	    p[i] = j;
	}
#ifdef DEBUG_qn
	REprintf(" f_1: j=%2d", j);
#endif
	j = n + 1;
	for (i = 0; i < n; ++i) {
	    while ((float)(y[i] - y[n - j + 1]) > trial)
		--j;
	    q[i] = j;
	}
	sump = 0;
	sumq = 0;
	for (i = 0; i < n; ++i) {
	    sump += p[i];
	    sumq += q[i] - 1;
	}
#ifdef DEBUG_qn
	REprintf(" f_2 -> j=%2d, sump|q= %lld,%lld", j, sump,sumq);
#endif
	if (knew <= sump) {
	    for (i = 0; i < n; ++i)
		right[i] = p[i];
	    nr = sump;
#ifdef DEBUG_qn
	    REprintf("knew <= sump =: nr , new right[]\n");
#endif
	} else if (knew > sumq) {
	    for (i = 0; i < n; ++i)
		left[i] = q[i];
	    nl = sumq;
#ifdef DEBUG_qn
	    REprintf("knew > sumq =: nl , new left[]\n");
#endif
	} else { /* sump < knew <= sumq */
	    found = TRUE;
#ifdef DEBUG_qn
	    REprintf("sump < knew <= sumq ---> FOUND\n");
#endif
	}
	ll++;
	if(ll>vv2)	found=TRUE;
//	printf("%lf\n",trial);

    } /* while */

    if (found)
	return trial;
    else {
#ifdef DEBUG_qn
	REprintf(".. not fnd -> new work[]");
#endif
	j = 0;
	for (i = 1; i < n; ++i) {
	    for (jj = left[i]; jj <= right[i]; ++jj) {
		work[j] = y[i] - y[n - jj];
		j++;
	    }/* j will be = sum_{i=2}^n (right[i] - left[i] + 1)_{+}  */
	}
#ifdef DEBUG_qn
	REprintf(" of length %d; knew-nl=%d\n", j, knew-nl);
#endif

	return (pull(work, j - 1, knew - nl));
//	knew -= (nl + 1); /* -1: 0-indexing */
//	rPsort(work, j, knew);
//	return(work[knew]);
    }
} /* qn0 */


double pull(double *a_in, int n, int k){
    int j;
    double *a, ax;
  //  char* vmax = vmaxget();
    a = (double *)calloc(n, sizeof(double));
    /* Copy a[] and use copy since it will be re-shuffled: */
    for (j = 0; j < n; j++)
	a[j] = a_in[j];

    k--; /* 0-indexing */
    qsort(a,n,sizeof(double),cmp);   
    ax = a[k];

//    vmaxset(vmax);
    return ax;
} /* pull */
int main(){
	FILE *inFile;
	double x[100],resu;
	int i,n=100,numRead=0;
	inFile=fopen("test.txt","r");
	if (!inFile)		return 1;
	for(i=0;i<n;i++) numRead=fscanf(inFile,"%lf",&x[i]);
        printf("%lf\n",x[0]);
	fclose(inFile);
	resu=qn0(x,n);
        printf("%lf\n",resu);
	return 0;
}




More information about the R-SIG-Robust mailing list