[RsR] Problem re-implementing whimed from package robustbase
Kaveh Vakili
k@veh@v@k||| @end|ng |rom w|@@ku|euven@be
Fri Jun 29 14:55:34 CEST 2012
Replacing
w_tot=w.sum();
by
w_tot=w.head(n).sum();
in the whimed function did the trick.
(~a classic)
On 06/24/2012 11:05 PM, Andreas Alfons wrote:
> I only had a quick look at the code and did not have time to try it
> myself, but you made many more changes to the code besides swapping
> the C arrays to C++ Eigen data types.
>
> I would suggest one of the following approaches to go further:
>
> 1) The original C code has been used for a long time and is proved to
> be reliable. If you need to use the EIgen data types for your
> application, you could simply write a wrapper function.
>
> 2) If you really need to re-implement it, I would start by changing
> only as little as necessary to make it work with the Eigen data types.
> If you first have something that works, you can then incrementally make
> more changes to take advantage of the Eigen library.
>
> - Andreas
>
>
> On Sat, Jun 23, 2012 at 9:27 PM, Kaveh Vakili
> <Kaveh.Vakili using wis.kuleuven.be> wrote:
>> Ok, "for make benefit" of future generations, i've attached to this
>> mail the ,stand alone' version of qn_sn.c (i've removed sn and 200
>> loc of bell and whistles to make it shorter).
>>
>> I guess now i will feed both this and my implementation to a
>> debugger and see what is causing the strange divergence....
>>
>> #include <stdlib.h>
>> #include <stdbool.h>
>> #include <stdio.h>
>> #include <inttypes.h>
>> #include <R.h>
>> #include <Rmath.h> /* -> <math.h> and much more */
>>
>> double qn0(double *x, int n);
>> double whimed_i(double *a, int *w, int n, double* a_cand, double *a_psrt, int
>> *w_cand);
>> double pull(double *a_in, int n, int k);
>> int cmp (const void *a, const void *b);
>> int main();
>>
>>
>> double whimed_i(double *a, int *w, int n, double* a_cand, double *a_psrt, int
>> *w_cand){
>> /*
>> Algorithm to compute the weighted high median in O(n) time.
>>
>> The whimed is defined as the smallest a[j] such that the sum
>> of the weights of all a[i] <= a[j] is strictly greater than
>> half of the total weight.
>>
>> Arguments:
>>
>> a: double array containing the observations
>> n: number of observations
>> w: array of (int/double) weights of the observations.
>> */
>>
>> int n2, i, kcand,mm=0,vv1=1;
>> /* sum of weights: `int' do overflow when n ~>= 1e5 */
>> int wleft, wmid, wright, w_tot, wrest;
>>
>> double trial,s_apsrt;
>>
>> w_tot = 0;
>> for (i = 0; i < n; ++i)
>> w_tot += w[i];
>> wrest = 0;
>>
>> /* REPEAT : */
>> do {
>> for (i = 0; i < n; ++i){
>> a_psrt[i] = a[i];
>> }
>> n2 = n/2;/* =^= n/2 +1 with 0-indexing */
>> trial = pull(a_psrt, n, n2);;
>> wleft = 0; wmid = 0; wright= 0;
>> for (i = 0; i < n; ++i) {
>> if (a[i] < trial) wleft += w[i];
>> else if (a[i] > trial) wright += w[i];
>> else wmid += w[i];
>> }
>> /* wleft = sum_{i; a[i] < trial} w[i]
>> * wmid = sum_{i; a[i] == trial} w[i] at least one 'i' since trial is
>> one a[]!
>> * wright= sum_{i; a[i] > trial} w[i]
>> */
>> kcand = 0;
>> if (2 * (wrest + wleft) > w_tot) {
>> for (i = 0; i < n; ++i) {
>> if (a[i] < trial) {
>> a_cand[kcand] = a[i];
>> w_cand[kcand] = w[i];
>> ++kcand;
>> }
>> }
>> }
>> else if (2 * (wrest + wleft + wmid) <= w_tot) {
>> for (i = 0; i < n; ++i) {
>> if (a[i] > trial) {
>> a_cand[kcand] = a[i];
>> w_cand[kcand] = w[i];
>> ++kcand;
>> }
>> }
>> wrest += wleft + wmid;
>> }
>> else {
>> return trial;
>> }
>> mm++;
>> n = kcand;
>> s_apsrt=0;
>> for (i = 0;i<n;++i) {
>> a[i] = a_cand[i];
>> w[i] = w_cand[i];
>> s_apsrt += a[i];
>> }
>>
>> } while(1);
>>
>> } /* _WHIMED_ */
>>
>> /* Main routines for C API */
>> //double qn(double *x, int n, int finite_corr);
>> /* these have no extra factors (no consistency factor & finite_corr): */
>>
>>
>> /* ----------- Implementations -----------------------------------*/
>>
>> //void Qn0(double *x, Sint *n, double *res) { *res = qn0(x, (int)*n); }
>>
>> int cmp (const void *pa, const void *pb){
>> double a = *(const double*)pa;
>> double b = *(const double*)pb;
>> return (int) (a - b);
>> }
>>
>> double qn0(double *x, int n){
>> /*--------------------------------------------------------------------
>>
>> Efficient algorithm for the scale estimator:
>>
>> Q*_n = { |x_i - x_j|; i<j }_(k) [= Qn without scaling ]
>>
>> i.e. the k-th order statistic of the |x_i - x_j|
>>
>> Parameters of the function Qn :
>> x : double array containing the observations
>> n : number of observations (n >=2)
>> */
>>
>> double *y = (double *)calloc(n, sizeof(double));
>> double *work = (double *)calloc(n, sizeof(double));
>> double *a_psrt = (double *)calloc(n, sizeof(double));
>> double *a_cand = (double *)calloc(n, sizeof(double));
>>
>> int *left = (int *)calloc(n, sizeof(int));
>> int *right = (int *)calloc(n, sizeof(int));
>> int *p = (int *)calloc(n, sizeof(int));
>> int *q = (int *)calloc(n, sizeof(int));
>> int *weight = (int *)calloc(n, sizeof(int));
>>
>> double trial;
>> bool found;
>>
>> double w_tot=0.0;
>>
>> int h, i, j,jj,jh,vv2=3,ll=0;
>> /* Following should be `long long int' : they can be of order n^2 */
>> int64_t k, knew, nl,nr, sump,sumq;
>>
>> h = n / 2 + 1;
>> k = (int64_t)h * (h - 1) / 2;
>> for (i = 0; i < n; ++i) {
>> y[i] = x[i];
>> left [i] = n - i + 1;
>> right[i] = (i <= h) ? n : n - (i - h);
>> /* the n - (i-h) is from the paper; original code had `n' */
>> }
>> // R_qsort(y, 1, n); /* y := sort(x) */
>> qsort(y,n,sizeof(double),cmp);
>> nl = (int64_t)n * (n + 1) / 2;
>> nr = (int64_t)n * n;
>> knew = k + nl;/* = k + (n+1 \over 2) */
>> found = FALSE;
>> #ifdef DEBUG_qn
>> REprintf("qn0(): h,k= %2d,%2d; nl,nr= %d,%d\n", h,k, nl,nr);
>> #endif
>> /* L200: */
>> while(!found && nr - nl > n) {
>> j = 0;
>> /* Truncation to float :
>> try to make sure that the same values are got later (guard bits !) */
>> for (i = 1; i < n; ++i) {
>> if (left[i] <= right[i]) {
>> weight[j] = right[i] - left[i] + 1;
>> jh = left[i] + weight[j] / 2;
>> work[j] = (float)(y[i] - y[n - jh]);
>> ++j;
>> }
>> }
>> trial = whimed_i(work, weight, j, a_cand, a_psrt, /*iw_cand*/ p);
>> #ifdef DEBUG_qn
>> REprintf(" ..!found: whimed(");
>> # ifdef DEBUG_long
>> REprintf("wrk=c(");
>> for(i=0; i < j; i++) REprintf("%s%g", (i>0)? ", " : "", work[i]);
>> REprintf("),\n wgt=c(");
>> for(i=0; i < j; i++) REprintf("%s%d", (i>0)? ", " : "", weight[i]);
>> REprintf("), j= %3d) -> trial= %7g\n", j, trial);
>> # else
>> REprintf("j=%3d) -> trial= %g:", j, trial);
>> # endif
>> #endif
>> j = 0;
>> for (i = n - 1; i >= 0; --i) {
>> while (j < n && ((float)(y[i] - y[n - j - 1])) < trial)
>> ++j;
>> p[i] = j;
>> }
>> #ifdef DEBUG_qn
>> REprintf(" f_1: j=%2d", j);
>> #endif
>> j = n + 1;
>> for (i = 0; i < n; ++i) {
>> while ((float)(y[i] - y[n - j + 1]) > trial)
>> --j;
>> q[i] = j;
>> }
>> sump = 0;
>> sumq = 0;
>> for (i = 0; i < n; ++i) {
>> sump += p[i];
>> sumq += q[i] - 1;
>> }
>> #ifdef DEBUG_qn
>> REprintf(" f_2 -> j=%2d, sump|q= %lld,%lld", j, sump,sumq);
>> #endif
>> if (knew <= sump) {
>> for (i = 0; i < n; ++i)
>> right[i] = p[i];
>> nr = sump;
>> #ifdef DEBUG_qn
>> REprintf("knew <= sump =: nr , new right[]\n");
>> #endif
>> } else if (knew > sumq) {
>> for (i = 0; i < n; ++i)
>> left[i] = q[i];
>> nl = sumq;
>> #ifdef DEBUG_qn
>> REprintf("knew > sumq =: nl , new left[]\n");
>> #endif
>> } else { /* sump < knew <= sumq */
>> found = TRUE;
>> #ifdef DEBUG_qn
>> REprintf("sump < knew <= sumq ---> FOUND\n");
>> #endif
>> }
>> ll++;
>> if(ll>vv2) found=TRUE;
>> // printf("%lf\n",trial);
>>
>> } /* while */
>>
>> if (found)
>> return trial;
>> else {
>> #ifdef DEBUG_qn
>> REprintf(".. not fnd -> new work[]");
>> #endif
>> j = 0;
>> for (i = 1; i < n; ++i) {
>> for (jj = left[i]; jj <= right[i]; ++jj) {
>> work[j] = y[i] - y[n - jj];
>> j++;
>> }/* j will be = sum_{i=2}^n (right[i] - left[i] + 1)_{+} */
>> }
>> #ifdef DEBUG_qn
>> REprintf(" of length %d; knew-nl=%d\n", j, knew-nl);
>> #endif
>>
>> return (pull(work, j - 1, knew - nl));
>> // knew -= (nl + 1); /* -1: 0-indexing */
>> // rPsort(work, j, knew);
>> // return(work[knew]);
>> }
>> } /* qn0 */
>>
>>
>> double pull(double *a_in, int n, int k){
>> int j;
>> double *a, ax;
>> // char* vmax = vmaxget();
>> a = (double *)calloc(n, sizeof(double));
>> /* Copy a[] and use copy since it will be re-shuffled: */
>> for (j = 0; j < n; j++)
>> a[j] = a_in[j];
>>
>> k--; /* 0-indexing */
>> qsort(a,n,sizeof(double),cmp);
>> ax = a[k];
>>
>> // vmaxset(vmax);
>> return ax;
>> } /* pull */
>> int main(){
>> FILE *inFile;
>> double x[100],resu;
>> int i,n=100,numRead=0;
>> inFile=fopen("test.txt","r");
>> if (!inFile) return 1;
>> for(i=0;i<n;i++) numRead=fscanf(inFile,"%lf",&x[i]);
>> printf("%lf\n",x[0]);
>> fclose(inFile);
>> resu=qn0(x,n);
>> printf("%lf\n",resu);
>> return 0;
>> }
>>
>> _______________________________________________
>> R-SIG-Robust using r-project.org mailing list
>> https://stat.ethz.ch/mailman/listinfo/r-sig-robust
>
>
More information about the R-SIG-Robust
mailing list