/*--------------------------------------------------------------------
 *    The GMT-system:	@(#)grdhisteq.c	2.18  3/13/95
 *
 *    Copyright (c) 1991-1995 by P. Wessel and W. H. F. Smith
 *    See README file for copying and redistribution conditions.
 *--------------------------------------------------------------------*/
/*
 * read a grdfile and find the values which divide its range
 * into n_cell number of quantiles.
 *
 * Author:	W.H.F. Smith
 * Date: 	31 May 1990
 *
 * Modified:	12 June, 1990 by whf smith, adding [-Q] option for
 *	quadratic scaling.  Some rgb color systems consider that
 *	if black = (0,0,0) and white = (1,1,1) or (255,255,255),
 *	then a neutral gray "halfway" between black and while should
 *	be set to gray = (0.75,0.75,0.75) or (191,191,191).  If so,
 *	let 0 <= x <= 1 be the desired gradation between black and
 *	white (the intensity factor used by the coloring program.
 *	Then the gray tone level 0 <= y <= 1 is given by:
 *		y = 2*x - x**2.
 *	Using the -Q option will find the data values which divide
 *	the data range into <n_cells> values of y; default linear
 *	scaling will find the values for <n_cells> divisions of x.
 *
 * Updated to v2.0 15-May-1991-1995 Paul Wessel
 */
 
#include "gmt.h"

struct	INDEXED_DATA {
	float	x;
	int	i;
}	*indexed_data;

struct	CELL {
	float	low;
	float	high;
}	*cell;


float	*data, data_min, data_max;
float	get_cell();
double	qsnorm();

int	last_cell, n_cells = 0, n_cells_m1 = 0;
int	i, j, nxy;
int	compare_indexed_floats();
int	compare_indices();

main (argc, argv)
int argc;
char **argv; {

	int	i;
	int	dump = FALSE, error = FALSE, quadratic = FALSE, gaussian = FALSE;
	char infile[100], outfile[100];
	
	argc = gmt_begin (argc, argv);
	
	for (i = 1; i < argc; i++) {
		if (argv[i][0] == '-') {
			switch (argv[i][1]) {
			
				/* Common parameters */
				
				case 'V':
				case '\0':
					error += get_common_args (argv[i], 0, 0, 0, 0);
					break;
					
				/* Supplemental parameters */
			
				case 'C':
					n_cells = atoi(&argv[i][2]);
					break;
				case 'D':
					dump = TRUE;
					break;
				case 'G':
					strcpy (outfile, &argv[i][2]);
					break;
				case 'N':
					gaussian = TRUE;
					break;
				case 'Q':
					quadratic = TRUE;
					break;
				default:
					error = TRUE;
					gmt_default_error (argv[i][1]);
					break;
			}
		}
		else
			strcpy (infile, argv[i]);
	}

	if (argc == 1 || gmt_quick) {
		fprintf (stderr,"grdhisteq %s - Histogram equalization for grdfiles\n\n", GMT_VERSION);
		fprintf (stderr, "usage: grdhisteq <infile> -G<outfile> [-C<n_cells> -D -N -Q -V]\n");
		if (gmt_quick) exit (-1);
		fprintf (stderr, "	-C<n_cells> sets how many cells (divisions) of data range to make.\n");
		fprintf (stderr, "	-D dump level information to stdout\n");
		fprintf (stderr, "	-G<outfile> will create an equalized output grdfile.\n");
		fprintf (stderr, "	-N use with -G to make an output grdfile with standard normal scores.\n");
		fprintf (stderr, "	-Q to use quadratic intensity scaling.  [Default is linear]\n");
		explain_option ('V');
		exit (-1);
	}

	if (!infile[0]) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR:  Must specify input file\n", gmt_program);
		error++;
	}
	if (gaussian && !outfile[0]) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR -N option:  Must also specify output file with -G\n", gmt_program);
		error++;
	}
	if (!gaussian && n_cells <= 0) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR -C option:  n_cells must be positive\n", gmt_program);
		error++;
	}
	if (error) exit (-1);

	if (!strcmp (infile, "=")) {
		fprintf (stderr, "grdhisteq: Piping of input grdfile not supported!\n");
		exit (-1);
	}
	
	if (gaussian)
		do_gaussian (infile, outfile);
	else
		do_usual (infile, outfile, n_cells, quadratic, dump);

	gmt_end (argc, argv);
}

int do_usual (infile, outfile, n_cells, quadratic, dump_intervals)
char *infile, *outfile;
int	n_cells, quadratic, dump_intervals; {

	double	delta_cell, target;
	struct GRD_HEADER header;
	int	nxy, nxy_0, current_cell, dummy[4];
	char format[80];

	dummy[3] = dummy[2] = dummy[1] = dummy[0] = 0;
	
	sprintf (format, "%s\t%s\t%%d\n", gmtdefs.d_format, gmtdefs.d_format);
	
	read_grd_info (infile, &header);
	nxy_0 = header.nx * header.ny;
	data = (float *) memory (CNULL, nxy_0, sizeof (float), "grdhisteq");
	read_grd (infile, &header, data, 0.0, 0.0, 0.0, 0.0, dummy, FALSE);

	cell = (struct CELL *) memory (CNULL, n_cells, sizeof(struct CELL), "grdhisteq");

	/* Sort the data and find the division points:  */
	
	qsort ((char *)data, nxy_0, sizeof(float), comp_float_asc);
	nxy = nxy_0;
	while (nxy > 0 && bad_float ((double)data[nxy-1])) nxy--;	/* Only deal with real numbers */

	data_min = data[0];
	data_max = data[nxy - 1];
	last_cell = n_cells/2;
	n_cells_m1 = n_cells - 1;

	current_cell = 0;
	i = 0;
	delta_cell = ((double)nxy) / ((double)n_cells);

	while (current_cell < n_cells) {

		if (current_cell == (n_cells - 1) ) {
			j = nxy - 1;
		}
		else if (quadratic) {	/* Use y = 2x - x**2 scaling  */
			
			target = ( (double) (current_cell + 1) ) / ( (double) n_cells);
			j = floor(nxy * (1.0 - sqrt(1.0 - target)));
		}
		else {	/* Use simple linear scale  */

			j = (floor( (current_cell + 1) * delta_cell)) - 1;
		}

		cell[current_cell].low = data[i];
		cell[current_cell].high = data[j];
		
		if (dump_intervals) printf (format, data[i], data[j], current_cell);

		i = j + 1;
		current_cell++;
	}

	if (outfile[0]) {
		read_grd (infile, &header, data, 0.0, 0.0, 0.0, 0.0, dummy, FALSE);

		for (i = 0; i < nxy_0; i++) data[i] = (bad_float ((double)data[i])) ? gmt_NaN : get_cell (data[i]);
		
		write_grd (outfile, &header, data, 0.0, 0.0, 0.0, 0.0, dummy, FALSE);
	}
	
	free ((char *) data);
	free ((char *) cell);
}

float	get_cell(x)
float	x;
{
	int		low, high, i;

	low = 0;
	high = n_cells_m1;
	i = last_cell;

	do {
		if (cell[i].low <= x && cell[i].high >= x) {
			last_cell = i;
			return ( (float)i);
		}
		else if (cell[low].low <= x && cell[low].high >= x) {
			return ( (float)low);
		}
		else if (cell[high].low <= x && cell[high].high >= x) {
			return ( (float)high);
		}
		else if (cell[i].low > x) {
			high = i;
			i = (low + high) / 2;
		}
		else if (cell[i].high < x) {
			low = i;
			i = (low + high) / 2;
		}
	} while (1);
	return ((float)0.0);
}

int do_gaussian (infile, outfile)
char *infile, *outfile; {
	int	i, j, nxy_0, dummy[4];
	double	dnxy;
	struct GRD_HEADER header;
	
	dummy[3] = dummy[2] = dummy[1] = dummy[0] = 0;
	read_grd_info (infile, &header);
	nxy_0 = header.nx * header.ny;
	data = (float *) memory (CNULL, nxy_0, sizeof (float), "grdhisteq");
	read_grd (infile, &header, data, 0.0, 0.0, 0.0, 0.0, dummy, FALSE);
	
	indexed_data = (struct INDEXED_DATA *) memory (CNULL, nxy_0, sizeof (struct INDEXED_DATA), "grdhisteq");

	for (i = j = 0, nxy = nxy_0; i < nxy_0; i++) {
		if (bad_float ((double)data[i])) {	/* Put NaNs in the back */
			nxy--;
			indexed_data[nxy].i = i;
			indexed_data[nxy].x = data[i];
		}
		else {
			indexed_data[j].i = i;
			indexed_data[j].x = data[i];
			j++;
		}
	}
	
	/* Sort on data value  */

	qsort ((char *)indexed_data, nxy, sizeof(struct INDEXED_DATA), compare_indexed_floats);

	dnxy = 1.0 / (nxy + 1);

	for (i = 0; i < nxy; i++) indexed_data[i].x = qsnorm ((double)((i + 1) * dnxy));

	/* Sort on data index  */

	qsort ((char *)indexed_data, nxy_0, sizeof(struct INDEXED_DATA), compare_indices);

	for (i = 0; i < nxy_0; i++) data[i] = indexed_data[i].x;

	write_grd (outfile, &header, data, 0.0, 0.0, 0.0, 0.0, dummy, FALSE);

	free ((char *) indexed_data);
	free ((char *) data);
}

int	compare_indexed_floats(point_1, point_2)
struct INDEXED_DATA	*point_1, *point_2;
{
	if ( point_1->x < point_2->x )
		return (-1);
	else if ( point_1->x > point_2->x )
		return (1);
	else 
		return (0);
}

int	compare_indices(point_1, point_2)

struct INDEXED_DATA	*point_1, *point_2;
{
	if ( point_1->i < point_2->i )
		return (-1);
	else if ( point_1->i > point_2->i )
		return (1);
	else 
		return (0);
}

/* double qsnorm(p)
 * double	p;
 *
 * Function to invert the cumulative normal probability
 * function.  If z is a standardized normal random deviate,
 * and Q(z) = p is the cumulative Gaussian probability 
 * function, then z = qsnorm(p).
 *
 * Note that 0.0 < p < 1.0.  Data values outside this range
 * will return +/- a large number (1.0e6).
 * To compute p from a sample of data to test for Normalcy,
 * sort the N samples into non-decreasing order, label them
 * i=[1, N], and then compute p = i/(N+1).
 *
 * Author:	Walter H. F. Smith
 * Date:	19 February, 1991-1995.
 *
 * Based on a Fortran subroutine by R. L. Parker.  I had been
 * using IMSL library routine DNORIN(DX) to do what qsnorm(p)
 * does, when I was at the Lamont-Doherty Geological Observatory
 * which had a site license for IMSL.  I now need to invert the
 * gaussian CDF without calling IMSL; hence, this routine.
 *
 */

double	qsnorm(p)
double	p;
{
	double	t, z;
	
	if (p <= 0.0) {
		fprintf(stderr,"qsnorm:  Bad probability.\n");
		return(-1.0e6);
	}
	else if (p >= 1.0) {
		fprintf(stderr,"qsnorm:  Bad probability.\n");
		return(1.0e6);
	}
	else if (p == 0.5) {
		return(0.0);
	}
	else if (p > 0.5) {
		t = sqrt(-2.0 * log(1.0 - p) );
		z = t - (2.515517 +t*(0.802853 +t*0.010328))/
			(1.0 + t*(1.432788 + t*(0.189269+ t*0.001308)));
		return(z);
	}
	else {
		t = sqrt(-2.0 * log(p) );
		z = t - (2.515517 +t*(0.802853 +t*0.010328))/
			(1.0 + t*(1.432788 + t*(0.189269+ t*0.001308)));
		return(-z);
	}
}
