[RsR] Problem re-implementing whimed from package robustbase
Kaveh
k@veh@v@k||| @end|ng |rom w|@@ku|euven@be
Sat Jun 23 21:27:25 CEST 2012
Ok, "for make benefit" of future generations, i've attached to this
mail the ,stand alone' version of qn_sn.c (i've removed sn and 200
loc of bell and whistles to make it shorter).
I guess now i will feed both this and my implementation to a
debugger and see what is causing the strange divergence....
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <inttypes.h>
#include <R.h>
#include <Rmath.h> /* -> <math.h> and much more */
double qn0(double *x, int n);
double whimed_i(double *a, int *w, int n, double* a_cand, double *a_psrt, int
*w_cand);
double pull(double *a_in, int n, int k);
int cmp (const void *a, const void *b);
int main();
double whimed_i(double *a, int *w, int n, double* a_cand, double *a_psrt, int
*w_cand){
/*
Algorithm to compute the weighted high median in O(n) time.
The whimed is defined as the smallest a[j] such that the sum
of the weights of all a[i] <= a[j] is strictly greater than
half of the total weight.
Arguments:
a: double array containing the observations
n: number of observations
w: array of (int/double) weights of the observations.
*/
int n2, i, kcand,mm=0,vv1=1;
/* sum of weights: `int' do overflow when n ~>= 1e5 */
int wleft, wmid, wright, w_tot, wrest;
double trial,s_apsrt;
w_tot = 0;
for (i = 0; i < n; ++i)
w_tot += w[i];
wrest = 0;
/* REPEAT : */
do {
for (i = 0; i < n; ++i){
a_psrt[i] = a[i];
}
n2 = n/2;/* =^= n/2 +1 with 0-indexing */
trial = pull(a_psrt, n, n2);;
wleft = 0; wmid = 0; wright= 0;
for (i = 0; i < n; ++i) {
if (a[i] < trial) wleft += w[i];
else if (a[i] > trial) wright += w[i];
else wmid += w[i];
}
/* wleft = sum_{i; a[i] < trial} w[i]
* wmid = sum_{i; a[i] == trial} w[i] at least one 'i' since trial is
one a[]!
* wright= sum_{i; a[i] > trial} w[i]
*/
kcand = 0;
if (2 * (wrest + wleft) > w_tot) {
for (i = 0; i < n; ++i) {
if (a[i] < trial) {
a_cand[kcand] = a[i];
w_cand[kcand] = w[i];
++kcand;
}
}
}
else if (2 * (wrest + wleft + wmid) <= w_tot) {
for (i = 0; i < n; ++i) {
if (a[i] > trial) {
a_cand[kcand] = a[i];
w_cand[kcand] = w[i];
++kcand;
}
}
wrest += wleft + wmid;
}
else {
return trial;
}
mm++;
n = kcand;
s_apsrt=0;
for (i = 0;i<n;++i) {
a[i] = a_cand[i];
w[i] = w_cand[i];
s_apsrt += a[i];
}
} while(1);
} /* _WHIMED_ */
/* Main routines for C API */
//double qn(double *x, int n, int finite_corr);
/* these have no extra factors (no consistency factor & finite_corr): */
/* ----------- Implementations -----------------------------------*/
//void Qn0(double *x, Sint *n, double *res) { *res = qn0(x, (int)*n); }
int cmp (const void *pa, const void *pb){
double a = *(const double*)pa;
double b = *(const double*)pb;
return (int) (a - b);
}
double qn0(double *x, int n){
/*--------------------------------------------------------------------
Efficient algorithm for the scale estimator:
Q*_n = { |x_i - x_j|; i<j }_(k) [= Qn without scaling ]
i.e. the k-th order statistic of the |x_i - x_j|
Parameters of the function Qn :
x : double array containing the observations
n : number of observations (n >=2)
*/
double *y = (double *)calloc(n, sizeof(double));
double *work = (double *)calloc(n, sizeof(double));
double *a_psrt = (double *)calloc(n, sizeof(double));
double *a_cand = (double *)calloc(n, sizeof(double));
int *left = (int *)calloc(n, sizeof(int));
int *right = (int *)calloc(n, sizeof(int));
int *p = (int *)calloc(n, sizeof(int));
int *q = (int *)calloc(n, sizeof(int));
int *weight = (int *)calloc(n, sizeof(int));
double trial;
bool found;
double w_tot=0.0;
int h, i, j,jj,jh,vv2=3,ll=0;
/* Following should be `long long int' : they can be of order n^2 */
int64_t k, knew, nl,nr, sump,sumq;
h = n / 2 + 1;
k = (int64_t)h * (h - 1) / 2;
for (i = 0; i < n; ++i) {
y[i] = x[i];
left [i] = n - i + 1;
right[i] = (i <= h) ? n : n - (i - h);
/* the n - (i-h) is from the paper; original code had `n' */
}
// R_qsort(y, 1, n); /* y := sort(x) */
qsort(y,n,sizeof(double),cmp);
nl = (int64_t)n * (n + 1) / 2;
nr = (int64_t)n * n;
knew = k + nl;/* = k + (n+1 \over 2) */
found = FALSE;
#ifdef DEBUG_qn
REprintf("qn0(): h,k= %2d,%2d; nl,nr= %d,%d\n", h,k, nl,nr);
#endif
/* L200: */
while(!found && nr - nl > n) {
j = 0;
/* Truncation to float :
try to make sure that the same values are got later (guard bits !) */
for (i = 1; i < n; ++i) {
if (left[i] <= right[i]) {
weight[j] = right[i] - left[i] + 1;
jh = left[i] + weight[j] / 2;
work[j] = (float)(y[i] - y[n - jh]);
++j;
}
}
trial = whimed_i(work, weight, j, a_cand, a_psrt, /*iw_cand*/ p);
#ifdef DEBUG_qn
REprintf(" ..!found: whimed(");
# ifdef DEBUG_long
REprintf("wrk=c(");
for(i=0; i < j; i++) REprintf("%s%g", (i>0)? ", " : "", work[i]);
REprintf("),\n wgt=c(");
for(i=0; i < j; i++) REprintf("%s%d", (i>0)? ", " : "", weight[i]);
REprintf("), j= %3d) -> trial= %7g\n", j, trial);
# else
REprintf("j=%3d) -> trial= %g:", j, trial);
# endif
#endif
j = 0;
for (i = n - 1; i >= 0; --i) {
while (j < n && ((float)(y[i] - y[n - j - 1])) < trial)
++j;
p[i] = j;
}
#ifdef DEBUG_qn
REprintf(" f_1: j=%2d", j);
#endif
j = n + 1;
for (i = 0; i < n; ++i) {
while ((float)(y[i] - y[n - j + 1]) > trial)
--j;
q[i] = j;
}
sump = 0;
sumq = 0;
for (i = 0; i < n; ++i) {
sump += p[i];
sumq += q[i] - 1;
}
#ifdef DEBUG_qn
REprintf(" f_2 -> j=%2d, sump|q= %lld,%lld", j, sump,sumq);
#endif
if (knew <= sump) {
for (i = 0; i < n; ++i)
right[i] = p[i];
nr = sump;
#ifdef DEBUG_qn
REprintf("knew <= sump =: nr , new right[]\n");
#endif
} else if (knew > sumq) {
for (i = 0; i < n; ++i)
left[i] = q[i];
nl = sumq;
#ifdef DEBUG_qn
REprintf("knew > sumq =: nl , new left[]\n");
#endif
} else { /* sump < knew <= sumq */
found = TRUE;
#ifdef DEBUG_qn
REprintf("sump < knew <= sumq ---> FOUND\n");
#endif
}
ll++;
if(ll>vv2) found=TRUE;
// printf("%lf\n",trial);
} /* while */
if (found)
return trial;
else {
#ifdef DEBUG_qn
REprintf(".. not fnd -> new work[]");
#endif
j = 0;
for (i = 1; i < n; ++i) {
for (jj = left[i]; jj <= right[i]; ++jj) {
work[j] = y[i] - y[n - jj];
j++;
}/* j will be = sum_{i=2}^n (right[i] - left[i] + 1)_{+} */
}
#ifdef DEBUG_qn
REprintf(" of length %d; knew-nl=%d\n", j, knew-nl);
#endif
return (pull(work, j - 1, knew - nl));
// knew -= (nl + 1); /* -1: 0-indexing */
// rPsort(work, j, knew);
// return(work[knew]);
}
} /* qn0 */
double pull(double *a_in, int n, int k){
int j;
double *a, ax;
// char* vmax = vmaxget();
a = (double *)calloc(n, sizeof(double));
/* Copy a[] and use copy since it will be re-shuffled: */
for (j = 0; j < n; j++)
a[j] = a_in[j];
k--; /* 0-indexing */
qsort(a,n,sizeof(double),cmp);
ax = a[k];
// vmaxset(vmax);
return ax;
} /* pull */
int main(){
FILE *inFile;
double x[100],resu;
int i,n=100,numRead=0;
inFile=fopen("test.txt","r");
if (!inFile) return 1;
for(i=0;i<n;i++) numRead=fscanf(inFile,"%lf",&x[i]);
printf("%lf\n",x[0]);
fclose(inFile);
resu=qn0(x,n);
printf("%lf\n",resu);
return 0;
}
More information about the R-SIG-Robust
mailing list