/* Blake LeBaron Dept. Of Economics University Of Wisconsin-Madison July 1988 March 1990 May 1997 August 1999 November 2001 This software is distributed with the understanding that it is free. Users may distribute it to anyone as long as they don't charge anything. Also, the author gives this out without any support or responsibility for errors of any kind. I hope that the distribution of this software will further enhance are understanding of these new measures of dependence. May 1997 revision: Changed k estimators from u to v statistics. June 1997 revision: Added detailed help printout. August 1999 revision: Modified tests to remove only points required to be removed for each dimension, (ie 2 for dim = 3) Also, c1, k, use full n points November 2001 Modified so that epsilon is in units of std */ #include #include /* NBITS is the number of useable bits per word entry. Technically on the sun this should be 32, as the sun uses 4 byte integers. Since the counting algorithm uses a table lookup method we must keep that table reasonable, so only 15 bits are used. This may be changed if space is a problem. */ #define NBITS 15 #define ALLBITS 0xffff #define PREC double #define TABLEN 32767 #define DEBUG 0 /* ----------- grid macro: turn bits on --------------------------- */ #define GRIDON(x,y) \ if(x!=y) { \ if(x>y) { \ ix = y; \ iy = x; \ } \ else { \ ix = x; \ iy = y; \ } \ iy = iy-ix-1; \ ipos = iy / NBITS; \ ibit = NBITS - 1 - (iy % NBITS); \ *(*(start+ix)+ipos) |= bits[ibit];\ } char *calloc(); /* define struct */ struct position { PREC value; int pos; }; /* globals */ static int bits[NBITS], *mask; static short int *grid, **start; void freeall(); double atof(); static int *lookup,first=1; static struct position *postab,*postlast; /* module function definitions */ void genmask(), gridon(), embed(), prhelp(); double evalc(); /* friendly front end - This main program is a friendly front end program that calls the routines to calculate the bds statistic. It allows unix user to: 1.) have an easy to use command imediately 2.) see how to use the calling routines for calculations Users doing montecarlo work will probably want to use the subroutines directly. These routines are: fkc(x,n,k,c1,c,c1s,m,mask,eps) cstat(c,cm,k,c1,m,n) freeall() fkc(x,n,k,c1,c,c1s,m,mask,eps) x = vector of series to test (double *), but it can be modified using the PREC definition. Setting PREC to float or int, will allow the use of other types of series. n = length of series (int) k = returned value of k (double *) (V-statistic estimate) c1 = returned value of c (double *)(V-statistic estimate) c = raw c values c[1],c[2],c[3].... (Note:the correct subscripts are used.) (double *) c1s = c(1) values for series shorted by (j-1) for each c1s(j). These correspond to the equivalent sample lengths for full sample c[j]'s. m = maximum embedding - cstats will calculated for i=1 to m (int) mask = number of points to ignore at the end of the series. Since the calculation of c(2) can effectively use more points then c(3), c(4) ..., often the last several points are ignored so that all statistics are calculated on the same set of points. ie. for m=3 we might only use x(1) through x(n-2) for the calculations of c(2) and c(3). This is generally set to m-1 to allow all c to be estimated on a point set of n-m+1. (int) August 1999 modification: If mask is set to zero the entire sample is used for each dimension in c[j]. Also, c1s uses the entire sample minus the last j-1 points to line up with c[j] (This is probably the best way to use the routine. See below.) eps = epsilon value for close points (double) or set to (PREC). cstat(c,cm,k,c1,m,n) This simple routine calculates the standard error and the normalized bds stat. It closely follows formulas in Brock Hsieh and LeBaron on page 43. c = c[1] c for embedding 1 cm = c[m] c for embedding m k = k stat (v-statistic) c1 = c for embedding 1 (v-statistic) m = embedding n = length of series freeall() The fkc algorithm allocates large amounts of memory. This is time consuming and for montecarlo simulations it is not desirable to reallocate every time. The routine can tell whether it needs to reallocate. For simulations fkc should be called repeatedly. When the program is finally done freeall() should be called to free all the allocated space. This front end module can be removed from the begin front end comment to the end front end comment. The remaining routines can be compiled as a stand alone library to be called by other programs. */ /* begin front end ---------------------------------- */ #define MAX 10000 #define MAXDIM 25 main(argc,argv) int argc; char *argv[]; { int n,m; double *x; int i; FILE *ifile,*fopen(); double k,c[MAXDIM],cstan[MAXDIM],c1s[MAXDIM]; double c1; char *calloc(); double cstat(); double eps; double x1sum, x2sum, epsstd, xstd; void fkc_slow(); void fkc(); if(argc==1) { printf("usage: bdstest ifile m eps(in std units) (-h alone for more help)\n"); exit(0); } if((*argv[1]=='-') && (*(argv[1]+1)=='h')) { prhelp(); exit(0); } /* get command line inputs */ m = atoi(argv[2]); epsstd = atof(argv[3]); x = (double *) calloc(MAX,sizeof(double)); /* read in data */ i = 0; ifile = fopen(argv[1],"r"); while(fscanf(ifile,"%le",&x[i++])!=EOF); fclose(ifile); /* set n to sample size */ n = i-1; /* find standard deviation */ x1sum = 0; x2sum = 0; for (i=0; ivalue = x[i]; (postab+i)->pos = i; } if(DEBUG) printf("sort\n"); qsort((char *)postab,n,sizeof(struct position),comp); postlast = postab+n-1; /* start row by row construction */ /* use theiler method */ if(DEBUG) printf("set grid\n"); count = 0; phi = 0; for(p=postab;p<=postlast;p++) { tcount = 0; pt = p ; /* count to right */ while( (pt->value - p->value)<=eps) { GRIDON(p->pos,pt->pos); if( (p->posposvalue - pt->value)<=eps) { if( (p->pospos 2 ) { for (i = *(start+j);i< *(start+j+1)-2;i++) { count += lookup[*i]; if(lookup[*i]>15) printf("%d %d %d\n", (int)(i-grid),*i,lookup[*i]); } for(i = *(start+j+1)-2;i< *(start+j+1);i++) { count += lookup[ (*i) & mask[j*2+ *(start+j+1)-i-1]]; } } else { for(i = *(start+j);i<*(start+j+1);i++) { count += lookup[ (*i) & mask[j*2+ *(start+j+1)-i-1]]; } } } if(DEBUG) printf("count = %ld\n",count); return ( 2*((double)count)/ (nd*(nd-1))); } int comp(a,b) struct position *a,*b; { if(a->value>b->value) return(1); else if(a->valuevalue) return(-1); else return(0); } /* This function calculates the asymptotic standard error from c and k. It then returns the test statistic which is asymptotically distributed normal(0,1). These formulas can be found in Brock, Hsieh, LeBaron, page 43. */ double cstat(c,cm,k,c1,m,n) double c,cm,k; double c1; int m,n; { double sigma, stat, std, ipow(); int j; sigma = 0; for(j=1;j<=m-1;j++) { sigma += 2.*ipow(k,m-j)*ipow(c1,2*j); } sigma += ipow(k,m) + (m-1)*(m-1)*ipow(c1,2*m) -m*m*k*ipow(c1,2*m-2); sigma *= (double)4; std = sqrt(sigma/((double)n)); stat = (cm - ipow(c,m))/std; return(stat); } double ipow(x,m) double x; int m; { int j; double y; y = 1; for(j=0;jmax) max = x; } return(max); } void prhelp() { printf("Detailed usage:\n\n"); printf("bdstest filename maxdim epsilon\n\n"); printf("filename: File name containing the time series in a single column\n"); printf("separated by carriage returns.\n\n"); printf("maxdim: The maximum dimension, m, to estimate the statistic for.\n"); printf("The program will print bds statistics in the range of 2 to m.\n"); printf("Also, the time series length will be reduced by a factor of m-1.\n"); printf("For example, for a series of length 10, for m=2, the test statistic\n"); printf("can only be estimated over a range of x(1)through x(9).\n"); printf("To make the estimates consistent across tests, the series is stopped\n"); printf("at N less the maximum dimension minus 1, maxdim-1.\n\n"); printf("epsilon: The epsilon parameter is given to the program in\n"); printf("units of the time series. Often in practice this\n"); printf("is set to a fraction of the standard deviation of the time series.\n\n"); printf("The output of the program is a simple vector corresponding to\n"); printf(" w(2), w(3), ..., w(m) \n"); printf("the normalized statistics which are asymptotically distributed normal with\n"); printf("zero mean and unit variance.\n\n"); printf("Note: output may be easily piped to a file or another program.\n"); }