static char rcsid[] = "$Id: ResultTable.cc,v 1.16 2000/01/19 21:55:13 bmah Exp $";
//
// $Id: ResultTable.cc,v 1.16 2000/01/19 21:55:13 bmah Exp $
//
// ResultTable.cc
// Bruce A. Mah <bmah@ca.sandia.gov>
//
// This work was first produced by an employee of Sandia National
// Laboratories under a contract with the U.S. Department of Energy.
// Sandia National Laboratories dedicates whatever right, title or
// interest it may have in this software to the public. Although no
// license from Sandia is needed to copy and use this software,
// copying and using the software might infringe the rights of
// others. This software is provided as-is. SANDIA DISCLAIMS ANY
// WARRANTY OF ANY KIND, EXPRESS OR IMPLIED.
//
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "pc.h"
#include "ResultTable.h"
#include "Kendall.h"

//
// Constructor
//
// Input:  Table parameters (i, m, r)
//
// Output:  None
//
ResultTable::ResultTable(int inc, int m, int r) : 
    increment(inc), mtu(m), repetitions(r), columns(mtu/increment)
{
    
    int i;
    
    // Stupid typedef hack for SparcWorks C++ compilier, which apparently
    // can't handle "new (footype *)[bar]".  We're trying to do:
    // data = new (double *) [columns];
    typedef double *DoublePtr;
    data = new (double *[columns]);
    used = new int[columns];
    for (i = 0; i < columns; i++) {
	data[i] = new double[repetitions];
	used[i] = 0;
    }

    // Invalidate result caches
    cacheSlrValid = false;
    cacheTauValid = false;
    cacheQueueingValid = false;
}

//
// ResultTable::~ResultTable
//
// Input:  None
//
// Output  None
//
ResultTable::~ResultTable() {
    int i;

    for (i = 0; i < columns; i++) {
	delete [] data[i];
    }
    delete data;
    delete used;
}

//
// ResultTable::put
//
// Input:  size, time pair
//
// Output:  Success code in return value (negative if an error)
//
// Insert a new result into the table.
//
int ResultTable::put(int size, double time) {
    int offset;

    // Make sure the size is an integer multiple of the increment size
    if (size % increment) {
	return -1;
    }

    // Is the offset within the proper range for the table?
    offset = (size/increment)-1;
    if ((offset < 0) || (offset >= columns)) {
	return -1;
    }
    
    // Any room left for more results in this column?
    if (used[offset] == repetitions) {
	return -1;
    }

    // Store data
    data[offset][used[offset]++] = time;
    return 0;

    // Invalidate result caches
    cacheSlrValid = false;
    cacheTauValid = false;
    cacheQueueingValid = false;
}

//
// ResultTable::getMin
//
// Input:  none
//
// Output:  Pointer to a new ResultTable (NULL if an error)
//
// Return a new ResultTable, which contains the minimum values
// of each packet size test.
//
ResultTable *ResultTable::getMin() {

    // Get new ResultTable, but we only need room for one
    // "repetition".
    ResultTable *t2 = new ResultTable(increment, mtu, 1);
    if (t2 == NULL) {
	return NULL;
    }

    // Iterate over columns (packet sizes)
    int i;
    for (i = 0; i < columns; i++) {

	// If any values, then find the minimum and store it.
	if (used[i]) {
	    int j;
	    double min = data[i][0];
	    for (j = 1; j < used[i]; j++) {
		if (data[i][j] < min) {
		    min = data[i][j];
		}
	    }

	    // Store the data directly into the new structure.
	    // It's probably evil to do this without using the
	    // access method put().
	    t2->data[i][0] = min;
	    t2->used[i]++;
	}
    }
    return t2;

}

//
// ResultTable::queueing
//
// Input:  None
//
// Output:  Average queueing delay for this dataset (in return
// value).  If there are no data points in this table, the result
// is 0.0.
//
// Compute average (?) queueing delay for this dataset.
// Found by computing, for each column, the difference from the column
// minimum.
//
// XXX we might want some better statistics too, such as getting
// a confidence interval.
//
double ResultTable::queueing()
{

    // If we've cached a queueing figure, then just return it.
    if (cacheQueueingValid) {
	return cacheQueueing;
	IF_DEBUG(1, fprintf(stderr, "ResultTable::queueing(): cache hit\n"));
    }

    // Results not valid, need to compute them.
    else {
	int i, j;
	double sigmaY = 0.0;
	int n = 0;

    // Loop over columns
	for (i = 0; i < columns; i++) {

	    // Only the ones with data points
	    if (used[i] > 0) {

		double min;
		double y;

		// Find the minimum data point for this column
		min = data[i][0];
		for (j = 1; j < used[i]; j++) {
		    if (data[i][j] < min) {
			min = data[i][j];
		    }
		}

		// Now compute the difference between each data
		// point and the minimum and add it to the sum.
		//
		// NB:  There are faster ways to get this result,
		// but we do it this way so that we can get access
		// to the individual data points, for example to
		// compute some other statistics on them.
		for (j = 0; j < used[i]; j++) {
		    y = data[i][j] - min;
		    sigmaY += y;
		    n++;
		}
	    }
	}
	if (n > 0) {
	    cacheQueueing = sigmaY / n;
	}
	else {
	    cacheQueueing = 0.0;
	}
	cacheQueueingValid = true;
	return cacheQueueing;
    }
}

//
// ResultTable::diff
//
// Input:  Another ResultTable to subtract from this.
//
// Output:  New ResultTable, containing a columnwise-difference.
// (NULL if an error).
//
ResultTable *ResultTable::diff(ResultTable *t2)
{
    ResultTable *tnew;
    int i;

    // Second table must exist
    if (t2 == NULL) {
	return NULL;
    }

    // Dimensions must be the same
    if ((mtu != t2->mtu) || (increment != t2->increment) || (columns != t2->columns)) {
	return NULL;
    }

    // Check the used fields of both tables...there must be at
    // most one used entry in each column (must be only one value
    // per column for a columnwise difference to make sense).
    for (i = 0; i < columns; i++) {
	if ((used[i] > 1) || (t2->used[i] > 1)) {
	    return NULL;
	}
    }

    // We seem to be OK, so allocate a new table for the difference.
    tnew = new ResultTable(increment, mtu, 1);
    if (tnew == NULL) {
	return NULL;
    }

    for (i = 0; i < columns; i++) {
	if ((used[i] > 0) && (t2->used[i] > 0)) {
	    tnew->data[i][0] = data[i][0] - t2->data[i][0];
	    tnew->used[i]++;
	}
    }
    return tnew;
}


//
// ResultTable::slr
//
// Input:  None
//
// Output:  SLR parameters (a and b, where a is the linear constant
// and b is the X coeffecient), coefficient of determination R2,
// standard deviation of parameters sb and sb.
//
// Compute simple linear regression for all data points, based on
// a least-squares algorithm as described by
// text in Chapter 14 of "The Art of Computer Systems Performance
// Analysis", R. Jain, 1991.
//
void ResultTable::slr(double &a, double &b, double &R2, double &sa, double &sb)
{

    // If cached results valid, use them
    if (cacheSlrValid) {
	a = cacheSlrA;
	b = cacheSlrB;
	R2 = cacheSlrR2;
	sa = cacheSlrSA;
	sb = cacheSlrSB;

	IF_DEBUG(1, fprintf(stderr, "ResultTable::slr(): cache hit\n"));

	return;
    }

    // Compute results
    else {
	double sigmaX = 0.0, sigmaY = 0.0, 
	    sigmaXY = 0.0, 
	    sigmaX2 = 0.0, sigmaY2 = 0.0;
	double Xbar, Ybar;
	double b0, b1;
	double SSY, SS0, SST, SSE, SSR;
	double se;
	int n = 0;
	int i, j;
   
	// Iterate over columns
	for (i = 0; i < columns; i++) {

	    // Iterate over points within a column
	    for (j = 0; j < used[i]; j++) {

		double X = (double) (i+1)*increment;
		double Y = data[i][j];

		sigmaX += X;
		sigmaY += Y;
		sigmaXY += (X*Y);
		sigmaX2 += (X*X);
		sigmaY2 += (Y*Y);
		n++;
	    }
	
	}

	// We need at least three datapoints.  If we don't have that
	// many, return something that, while bogus, at least makes a
	// little sense, to avoid getting divide-by-zero situations.
	if (n == 0) {
	    a = 0.0;
	    b = 0.0;
	    R2 = 0.0;
	    sa = 0.0;
	    sb = 0.0;
	    return;
	}

	Xbar = sigmaX / n;
	Ybar = sigmaY / n;

	// b1 = b, b0 = a
	b1 = (sigmaXY - (n * Xbar * Ybar)) / (sigmaX2 - (n * Xbar * Xbar));
	b0 = Ybar - b1 * Xbar;
    
	// Compute variation
	SSY = sigmaY2;
	SS0 = n * (Ybar * Ybar);
	SST = SSY - SS0;
	SSE = sigmaY2 - (b0 * sigmaY) - (b1 * sigmaXY);
	SSR = SST - SSE;

	// Compute regression parameters
	a = b0;
	b = b1;

	// Compute coefficient of determination
	R2 = SSR/SST;

	// Compute standard deviation of errors
	se = sqrt(SSE/(n-2));

	// Compute Standard deviation of parameters
	sa = se * sqrt( (1/n) + ((Xbar * Xbar) / 
				 (sigmaX2 - (n * Xbar * Xbar))));
	sb = se / sqrt( sigmaX2 - (n * Xbar * Xbar));
	

	// Cache results for later
	cacheSlrA = a;
	cacheSlrB = b;
	cacheSlrR2 = R2;
	cacheSlrSA = sa;
	cacheSlrSB = sb;
	cacheSlrValid = true;
    }
}

//
// ResultTable::tau
//
// Input:  None
//
// Output:  Linear regression parameters (a and b, where a is the
// linear constant and b is the X coeffecient), width of XXX% confidence
// interval for b.
//
// Compute linear fit based on Kendall's tau statistic, as described
// in "Practical Nonparametric Statistics", Third Edition, W. J. Conover, 
// 1999, p. 335.
//
void ResultTable::tau(double &a, double &b, double &blower, double &bupper)
{

    // Check for valid, cached results
    if (cacheTauValid) {
    }
    else {
	unsigned int maxSlopes;	// maximum number of slopes to compute
	unsigned int numSlopes;	// actual number of slopes found
	unsigned int maxValues;	// max values in the table?
	unsigned int numValues;	// how many values in the table?
	int i;			// universal loop counter
	unsigned int xcol, xitem, ycol, yitem;
	
	// Compute number of slopes we might need to work with
	maxSlopes = 0;
	maxValues = 0;
	for (i = 0; i < columns; i++) {
	    maxValues += used[i];
	}

	// If less than two values we can't compute a regression,
	// so give up.
	if (maxValues < 2) {
	    a = 0.0;
	    b = 0.0;
	    blower = 0.0;
	    bupper = 0.0;
	    return;
	}

	maxSlopes = maxValues * (maxValues - 1) / 2;
	
	double *slopes;
	slopes = new double[maxSlopes];

	double *xvalues, *yvalues;
	xvalues = new double[maxValues];
	yvalues = new double[maxValues];

	// Compute all the slopes.  Basically, we try to treat the
	// maxSlopes datapoints as being in a single, 1-D array,
	// rather than being in a set of 1-D arrays of variable
	// sizes.  We refer to the two values being "pointed to"
	// as x and y.
	numSlopes = 0;
	numValues = 0;
	xcol = 0;
	xitem = 0;

	// Iterate through the items to find X values
	while (xcol < columns) {
	    while (xitem < used[xcol]) {

		// Record this X and Y value
		xvalues[numValues] = (double)column2size(xcol);
		yvalues[numValues] = data[xcol][xitem];
		numValues++;

		// Start looking for Y values, given a single X 
		// value.  Start with the "next" item in sequence
		// after the one we chose for X.  Note that after
		// the next two lines, ycol/yitem might point out
		// of bounds.  That's OK, because we check them
		// immediately afterwards (incrementing if necessary).
		ycol = xcol;
		yitem = xitem + 1;

		while (ycol < columns) {
		    while (yitem < used[ycol]) {

			double xx, xy, yx, yy;
			xx = column2size(xcol);
			xy = data[xcol][xitem];
			yx = column2size(ycol);
			yy = data[ycol][yitem];

			// Try to avoid divide-by-zero errors
			if (yx != xx) {
			    double slope = (yy-xy) / (yx-xx);
			    slopes[numSlopes++] = slope;
			}
			else {
			    fprintf(stderr, "Warning:  Duplicate x values (%f,%f) = (%f,%f)\n", xx, xy, yx, yy);
			}

			yitem++;
		    }
		    ycol++;
		    yitem = 0;
		}

		xitem++;
	    }
	    xcol++;
	    xitem = 0;
	}

	// If we had to throw away points because of duplicate X
	// values, this could throw our confidence intervals off.
	if (numSlopes != maxSlopes) {
	    fprintf(stderr, "Warning: duplicate X values forced discarding of data points\n");
	}

	// Compute slope
	b = median(slopes, numSlopes);

	// Compute intercept
	double xmedian, ymedian;
	xmedian = median(xvalues, numValues);
	ymedian = median(yvalues, numValues);
	a = ymedian - b * xmedian;

	// Compute confidence interval on slope
	unsigned int T, r, s;
	T = Kendall::T(numValues, KendallP950);	// 90% confidence for now
	r = (numSlopes - T) / 2 - 1;
	s = ((numSlopes + T + 1)) / 2;
	
	bupper = slopes[r];
	blower = slopes[s];

	delete [] slopes;
	delete [] xvalues;
	delete [] yvalues;

    }

}

//
// ResultTable::median
//
// Input:
//
// Output: Median value
//
// Compute the median of an array of doubles.  
// As a side effect, the input array is sorted
// 
double ResultTable::median(double *values, unsigned int numValues)
{
    double medianValue;

    // Sort the using qsort(3).
    extern int doublecomp(const void *a, const void *b);
    qsort((void *) values, numValues, sizeof(double), doublecomp);

    // Find median value.
    if (numValues & 1) {
	// Odd number of samples
	medianValue = values[(numValues-1)/2];
    }
    else {
	// Even number of samples
	medianValue = (values[(numValues/2)] + values[(numValues/2)-1]) /
	    2.0;
    }
    return medianValue;
}

// Function for qsort(3) to determine the relative ordering of two
// doubles.  Used in the call to qsort above.
int doublecomp(const void *a, const void *b) 
{
    double adouble = *(const double *) a;
    double bdouble = *(const double *) b;
    if (adouble == bdouble) {
	return 0;
    }
    else {
	if (adouble < bdouble) {
	    return -1;
	}
	else {
	    return 1;
	}
    }
}


//
// ResultTable::Print
//
// Input:  file pointer to print to, tag string, hop number
//
// Output:  Success code
//
// Print the contents of the table to the file pointer fp.
//
int ResultTable::Print(FILE *fp, char *tag, int hop)
{

    int i, j;

    for (i = 0; i < columns; i++) {
	for (j = 0; j < used[i]; j++) {

	    fprintf(fp, "%s %d %d %f\n", tag, hop, column2size(i), 
		    data[i][j]);

	}
    }
    return 0;
}



