1 /*
2  * Experimental data  distribution table generator
3  * Taken from the uncopyrighted NISTnet code (public domain).
4  *
5  * Read in a series of "random" data values, either
6  * experimentally or generated from some probability distribution.
7  * From this, create the inverse distribution table used to approximate
8  * the distribution.
9  */
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <math.h>
13 #include <malloc.h>
14 #include <string.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 
18 
19 double *
readdoubles(FILE * fp,int * number)20 readdoubles(FILE *fp, int *number)
21 {
22 	struct stat info;
23 	double *x;
24 	int limit;
25 	int n=0, i;
26 
27 	if (!fstat(fileno(fp), &info) &&
28 	    info.st_size > 0) {
29 		limit = 2*info.st_size/sizeof(double);	/* @@ approximate */
30 	} else {
31 		limit = 10000;
32 	}
33 
34 	x = calloc(limit, sizeof(double));
35 	if (!x) {
36 		perror("double alloc");
37 		exit(3);
38 	}
39 
40 	for (i=0; i<limit; ++i){
41 		if (fscanf(fp, "%lf", &x[i]) != 1 ||
42 		    feof(fp))
43 			break;
44 		++n;
45 	}
46 	*number = n;
47 	return x;
48 }
49 
50 void
arraystats(double * x,int limit,double * mu,double * sigma,double * rho)51 arraystats(double *x, int limit, double *mu, double *sigma, double *rho)
52 {
53 	int n=0, i;
54 	double sumsquare=0.0, sum=0.0, top=0.0;
55 	double sigma2=0.0;
56 
57 	for (i=0; i<limit; ++i){
58 		sumsquare += x[i]*x[i];
59 		sum += x[i];
60 		++n;
61 	}
62 	*mu = sum/(double)n;
63 	*sigma = sqrt((sumsquare - (double)n*(*mu)*(*mu))/(double)(n-1));
64 
65 	for (i=1; i < n; ++i){
66 		top += ((double)x[i]- *mu)*((double)x[i-1]- *mu);
67 		sigma2 += ((double)x[i-1] - *mu)*((double)x[i-1] - *mu);
68 
69 	}
70 	*rho = top/sigma2;
71 }
72 
73 /* Create a (normalized) distribution table from a set of observed
74  * values.  The table is fixed to run from (as it happens) -4 to +4,
75  * with granularity .00002.
76  */
77 
78 #define TABLESIZE	16384/4
79 #define TABLEFACTOR	8192
80 #ifndef MINSHORT
81 #define MINSHORT	-32768
82 #define MAXSHORT	32767
83 #endif
84 
85 /* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger
86  * than MAXSHORT, we don't bother looking at a larger domain than this:
87  */
88 #define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1)
89 #define DISTTABLEGRANULARITY 50000
90 #define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2)
91 
92 static int *
makedist(double * x,int limit,double mu,double sigma)93 makedist(double *x, int limit, double mu, double sigma)
94 {
95 	int *table;
96 	int i, index, first=DISTTABLESIZE, last=0;
97 	double input;
98 
99 	table = calloc(DISTTABLESIZE, sizeof(int));
100 	if (!table) {
101 		perror("table alloc");
102 		exit(3);
103 	}
104 
105 	for (i=0; i < limit; ++i) {
106 		/* Normalize value */
107 		input = (x[i]-mu)/sigma;
108 
109 		index = (int)rint((input+DISTTABLEDOMAIN)*DISTTABLEGRANULARITY);
110 		if (index < 0) index = 0;
111 		if (index >= DISTTABLESIZE) index = DISTTABLESIZE-1;
112 		++table[index];
113 		if (index > last)
114 			last = index +1;
115 		if (index < first)
116 			first = index;
117 	}
118 	return table;
119 }
120 
121 /* replace an array by its cumulative distribution */
122 static void
cumulativedist(int * table,int limit,int * total)123 cumulativedist(int *table, int limit, int *total)
124 {
125 	int accum=0;
126 
127 	while (--limit >= 0) {
128 		accum += *table;
129 		*table++ = accum;
130 	}
131 	*total = accum;
132 }
133 
134 static short *
inverttable(int * table,int inversesize,int tablesize,int cumulative)135 inverttable(int *table, int inversesize, int tablesize, int cumulative)
136 {
137 	int i, inverseindex, inversevalue;
138 	short *inverse;
139 	double findex, fvalue;
140 
141 	inverse = (short *)malloc(inversesize*sizeof(short));
142 	for (i=0; i < inversesize; ++i) {
143 		inverse[i] = MINSHORT;
144 	}
145 	for (i=0; i < tablesize; ++i) {
146 		findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN;
147 		fvalue = (double)table[i]/(double)cumulative;
148 		inverseindex = (int)rint(fvalue*inversesize);
149 		inversevalue = (int)rint(findex*TABLEFACTOR);
150 		if (inversevalue <= MINSHORT) inversevalue = MINSHORT+1;
151 		if (inversevalue > MAXSHORT) inversevalue = MAXSHORT;
152 		if (inverseindex >= inversesize) inverseindex = inversesize- 1;
153 
154 		inverse[inverseindex] = inversevalue;
155 	}
156 	return inverse;
157 
158 }
159 
160 /* Run simple linear interpolation over the table to fill in missing entries */
161 static void
interpolatetable(short * table,int limit)162 interpolatetable(short *table, int limit)
163 {
164 	int i, j, last, lasti = -1;
165 
166 	last = MINSHORT;
167 	for (i=0; i < limit; ++i) {
168 		if (table[i] == MINSHORT) {
169 			for (j=i; j < limit; ++j)
170 				if (table[j] != MINSHORT)
171 					break;
172 			if (j < limit) {
173 				table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti);
174 			} else {
175 				table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti);
176 			}
177 		} else {
178 			last = table[i];
179 			lasti = i;
180 		}
181 	}
182 }
183 
184 static void
printtable(const short * table,int limit)185 printtable(const short *table, int limit)
186 {
187 	int i;
188 
189 	printf("# This is the distribution table for the experimental distribution.\n");
190 
191 	for (i=0 ; i < limit; ++i) {
192 		printf("%d%c", table[i],
193 		       (i % 8) == 7 ? '\n' : ' ');
194 	}
195 }
196 
197 int
main(int argc,char ** argv)198 main(int argc, char **argv)
199 {
200 	FILE *fp;
201 	double *x;
202 	double mu, sigma, rho;
203 	int limit;
204 	int *table;
205 	short *inverse;
206 	int total;
207 
208 	if (argc > 1) {
209 		if (!(fp = fopen(argv[1], "r"))) {
210 			perror(argv[1]);
211 			exit(1);
212 		}
213 	} else {
214 		fp = stdin;
215 	}
216 	x = readdoubles(fp, &limit);
217 	if (limit <= 0) {
218 		fprintf(stderr, "Nothing much read!\n");
219 		exit(2);
220 	}
221 	arraystats(x, limit, &mu, &sigma, &rho);
222 #ifdef DEBUG
223 	fprintf(stderr, "%d values, mu %10.4f, sigma %10.4f, rho %10.4f\n",
224 		limit, mu, sigma, rho);
225 #endif
226 
227 	table = makedist(x, limit, mu, sigma);
228 	free((void *) x);
229 	cumulativedist(table, DISTTABLESIZE, &total);
230 	inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total);
231 	interpolatetable(inverse, TABLESIZE);
232 	printtable(inverse, TABLESIZE);
233 	return 0;
234 }
235