#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "blas_extended.h"
#include "blas_extended_private.h"
#include "blas_extended_test.h"



double do_test_swaxpby(int n,
		       int ntests,
		       int *seed,
		       double thresh,
		       int debug, float test_prob,
		       double *min_ratio, int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_swaxpby";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  float y_i;
  float alpha;
  float beta;
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_swaxpby */
  float x_fix1;
  float x_fix2;
  float zero;
  float one;
  float dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_S);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
    prec = blas_prec_single;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj,
			  &alpha, 0, &beta, 0,
			  &x_fix1, &x_gen[0], seed,
			  &y_gen[0], &head_w_true[0], &tail_w_true[0]);

	xgen_val = incx_gen;
	ygen_val = incy_gen;
	for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	     wgen_val += incw_gen) {
	  BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj, &alpha, 1, &beta, 1,
			    &x_fix1, &x_gen[xgen_val], seed, &y_gen[ygen_val],
			    &head_w_true[wgen_val], &tail_w_true[wgen_val]);
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;


	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj = x_gen[j];
	    x[ix] = x_genj;
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;


	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj = y_gen[j];
	      y[iy] = y_genj;
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;


	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_swaxpby to get w */
	      FPU_FIX_STOP;
	      BLAS_swaxpby(n, alpha, x, incx_val, beta, y, incy_val,
			   w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_sdot(1, blas_no_conj, alpha, beta, y[iy], w[iw],
			       head_w_true[test_val], tail_w_true[test_val],
			       &x_fix1, incx, &x[ix], incx, eps_int, un_int,
			       &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("%16.8e", x[ix]);
		    printf("; ");
		    printf("%16.8e", y[iy]);
		    printf("; ");
		    printf("%16.8e", w[iw]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("%16.8e", alpha);
		  printf("; ");
		  printf("beta = ");
		  printf("%16.8e", beta);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_swaxpby */

double do_test_dwaxpby(int n,
		       int ntests,
		       int *seed,
		       double thresh,
		       int debug, float test_prob,
		       double *min_ratio, int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dwaxpby";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i;
  double y_i;
  double alpha;
  double beta;
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_dwaxpby */
  double x_fix1;
  double x_fix2;
  double zero;
  double one;
  double dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj;
  double y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	BLAS_ddot_testgen(1, 0, 1, norm, blas_no_conj,
			  &alpha, 0, &beta, 0,
			  &x_fix1, &x_gen[0], seed,
			  &y_gen[0], &head_w_true[0], &tail_w_true[0]);

	xgen_val = incx_gen;
	ygen_val = incy_gen;
	for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	     wgen_val += incw_gen) {
	  BLAS_ddot_testgen(1, 0, 1, norm, blas_no_conj, &alpha, 1, &beta, 1,
			    &x_fix1, &x_gen[xgen_val], seed, &y_gen[ygen_val],
			    &head_w_true[wgen_val], &tail_w_true[wgen_val]);
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;


	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj = x_gen[j];
	    x[ix] = x_genj;
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;


	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj = y_gen[j];
	      y[iy] = y_genj;
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;


	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_dwaxpby to get w */
	      FPU_FIX_STOP;
	      BLAS_dwaxpby(n, alpha, x, incx_val, beta, y, incy_val,
			   w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_ddot(1, blas_no_conj, alpha, beta, y[iy], w[iw],
			       head_w_true[test_val], tail_w_true[test_val],
			       &x_fix1, incx, &x[ix], incx, eps_int, un_int,
			       &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("%24.16e", x[ix]);
		    printf("; ");
		    printf("%24.16e", y[iy]);
		    printf("; ");
		    printf("%24.16e", w[iw]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("%24.16e", alpha);
		  printf("; ");
		  printf("beta = ");
		  printf("%24.16e", beta);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_dwaxpby */

double do_test_cwaxpby(int n,
		       int ntests,
		       int *seed,
		       double thresh,
		       int debug, float test_prob,
		       double *min_ratio, int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cwaxpby";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i[2];
  float y_i[2];
  float alpha[2];
  float beta[2];
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_cwaxpby */
  float x_fix1[2];
  float x_fix2[2];
  float zero[2];
  float one[2];
  float dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj[2];
  float y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_S);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
    prec = blas_prec_single;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	BLAS_cdot_testgen(1, 0, 1, norm, blas_no_conj,
			  &alpha, 0, &beta, 0,
			  &x_fix1, &x_gen[0], seed,
			  &y_gen[0], &head_w_true[0], &tail_w_true[0]);

	xgen_val = incx_gen;
	ygen_val = incy_gen;
	for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	     wgen_val += incw_gen) {
	  BLAS_cdot_testgen(1, 0, 1, norm, blas_no_conj, &alpha, 1, &beta, 1,
			    &x_fix1, &x_gen[xgen_val], seed, &y_gen[ygen_val],
			    &head_w_true[wgen_val], &tail_w_true[wgen_val]);
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;
	  incx *= 2;

	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj[0] = x_gen[j];
	    x_genj[1] = x_gen[1 + j];
	    x[ix] = x_genj[0];
	    x[1 + ix] = x_genj[1];
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;
	    incy *= 2;

	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj[0] = y_gen[j];
	      y_genj[1] = y_gen[1 + j];
	      y[iy] = y_genj[0];
	      y[1 + iy] = y_genj[1];
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_cwaxpby to get w */
	      FPU_FIX_STOP;
	      BLAS_cwaxpby(n, alpha, x, incx_val, beta, y, incy_val,
			   w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_cdot(1, blas_no_conj, alpha, beta, &y[iy], &w[iw],
			       &head_w_true[test_val], &tail_w_true[test_val],
			       &x_fix1, incx, &x[ix], incx, eps_int, un_int,
			       &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
		    printf("; ");
		    printf("(%16.8e, %16.8e)", y[iy], y[iy + 1]);
		    printf("; ");
		    printf("(%16.8e, %16.8e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_cwaxpby */

double do_test_zwaxpby(int n,
		       int ntests,
		       int *seed,
		       double thresh,
		       int debug, float test_prob,
		       double *min_ratio, int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i[2];
  double y_i[2];
  double alpha[2];
  double beta[2];
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby */
  double x_fix1[2];
  double x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj[2];
  double y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	BLAS_zdot_testgen(1, 0, 1, norm, blas_no_conj,
			  &alpha, 0, &beta, 0,
			  &x_fix1, &x_gen[0], seed,
			  &y_gen[0], &head_w_true[0], &tail_w_true[0]);

	xgen_val = incx_gen;
	ygen_val = incy_gen;
	for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	     wgen_val += incw_gen) {
	  BLAS_zdot_testgen(1, 0, 1, norm, blas_no_conj, &alpha, 1, &beta, 1,
			    &x_fix1, &x_gen[xgen_val], seed, &y_gen[ygen_val],
			    &head_w_true[wgen_val], &tail_w_true[wgen_val]);
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;
	  incx *= 2;

	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj[0] = x_gen[j];
	    x_genj[1] = x_gen[1 + j];
	    x[ix] = x_genj[0];
	    x[1 + ix] = x_genj[1];
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;
	    incy *= 2;

	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj[0] = y_gen[j];
	      y_genj[1] = y_gen[1 + j];
	      y[iy] = y_genj[0];
	      y[1 + iy] = y_genj[1];
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_zwaxpby to get w */
	      FPU_FIX_STOP;
	      BLAS_zwaxpby(n, alpha, x, incx_val, beta, y, incy_val,
			   w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_zdot(1, blas_no_conj, alpha, beta, &y[iy], &w[iw],
			       &head_w_true[test_val], &tail_w_true[test_val],
			       &x_fix1, incx, &x[ix], incx, eps_int, un_int,
			       &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", y[iy], y[iy + 1]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby */

double do_test_dwaxpby_d_s(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dwaxpby_d_s";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i;
  float y_i;
  double alpha;
  double beta;
  double *x;
  float *y;
  double *w;			/* the w computed  by BLAS_dwaxpby_d_s */
  double x_fix1;
  float x_fix2;
  double zero;
  double one;
  double dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  float *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj;
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	X = xrand(seed);
	X_int = X * (power(2, 12) - 1);
	X = X_int;

	alpha = X * X * X * X / power(2, 48);
	beta = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);

	x_i = X * X / power(2, 24);
	y_i = -(X * X - 1) / power(2, 24);

	xgen_val = 0;
	ygen_val = 0;
	for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	  x_gen[xgen_val] = x_i;
	  y_gen[ygen_val] = y_i;
	  head_w_true[wgen_val] = 1.0 / power(2, 72);
	  tail_w_true[wgen_val] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;


	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj = x_gen[j];
	    x[ix] = x_genj;
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;


	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj = y_gen[j];
	      y[iy] = y_genj;
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;


	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_dwaxpby_d_s to get w */
	      FPU_FIX_STOP;
	      BLAS_dwaxpby_d_s(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_ddot_s_s(1, blas_no_conj, beta, alpha, x[ix], w[iw],
				   head_w_true[test_val],
				   tail_w_true[test_val], &x_fix2, incy,
				   &y[iy], incy, eps_int, un_int, &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("%24.16e", x[ix]);
		    printf("; ");
		    printf("%16.8e", y[iy]);
		    printf("; ");
		    printf("%24.16e", w[iw]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("%24.16e", alpha);
		  printf("; ");
		  printf("beta = ");
		  printf("%24.16e", beta);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_dwaxpby_d_s */

double do_test_dwaxpby_s_d(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dwaxpby_s_d";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  double y_i;
  double alpha;
  double beta;
  float *x;
  double *y;
  double *w;			/* the w computed  by BLAS_dwaxpby_s_d */
  float x_fix1;
  double x_fix2;
  double zero;
  double one;
  double dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  double *y_gen;
  double *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  double y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	X = xrand(seed);
	X_int = X * (power(2, 12) - 1);
	X = X_int;

	alpha = X * X * X * X / power(2, 48);
	beta = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);

	x_i = X * X / power(2, 24);
	y_i = -(X * X - 1) / power(2, 24);

	xgen_val = 0;
	ygen_val = 0;
	for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	  x_gen[xgen_val] = x_i;
	  y_gen[ygen_val] = y_i;
	  head_w_true[wgen_val] = 1.0 / power(2, 72);
	  tail_w_true[wgen_val] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;


	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj = x_gen[j];
	    x[ix] = x_genj;
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;


	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj = y_gen[j];
	      y[iy] = y_genj;
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;


	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_dwaxpby_s_d to get w */
	      FPU_FIX_STOP;
	      BLAS_dwaxpby_s_d(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_ddot_s_s(1, blas_no_conj, alpha, beta, y[iy], w[iw],
				   head_w_true[test_val],
				   tail_w_true[test_val], &x_fix1, incx,
				   &x[ix], incx, eps_int, un_int, &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("%16.8e", x[ix]);
		    printf("; ");
		    printf("%24.16e", y[iy]);
		    printf("; ");
		    printf("%24.16e", w[iw]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("%24.16e", alpha);
		  printf("; ");
		  printf("beta = ");
		  printf("%24.16e", beta);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_dwaxpby_s_d */

double do_test_dwaxpby_s_s(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dwaxpby_s_s";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  float y_i;
  double alpha;
  double beta;
  float *x;
  float *y;
  double *w;			/* the w computed  by BLAS_dwaxpby_s_s */
  float x_fix1;
  float x_fix2;
  double zero;
  double one;
  double dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  double *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	X = xrand(seed);
	X_int = X * (power(2, 12) - 1);
	X = X_int;

	alpha = X * X * X * X / power(2, 48);
	beta = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);

	x_i = X * X / power(2, 24);
	y_i = -(X * X - 1) / power(2, 24);

	xgen_val = 0;
	ygen_val = 0;
	for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	  x_gen[xgen_val] = x_i;
	  y_gen[ygen_val] = y_i;
	  head_w_true[wgen_val] = 1.0 / power(2, 72);
	  tail_w_true[wgen_val] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;


	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj = x_gen[j];
	    x[ix] = x_genj;
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;


	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj = y_gen[j];
	      y[iy] = y_genj;
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;


	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_dwaxpby_s_s to get w */
	      FPU_FIX_STOP;
	      BLAS_dwaxpby_s_s(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      temp_ab[0] = alpha;
	      temp_ab[incw_gen] = beta;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		x_genj = x[ix];
		temp_xy[0] = x_genj;

		y_genj = y[iy];
		temp_xy[incy_gen] = y_genj;

		test_BLAS_ddot_d_s(2, blas_no_conj, one, zero,
				   dummy, w[iw],
				   head_w_true[test_val],
				   tail_w_true[test_val], temp_ab, 1, temp_xy,
				   1, eps_int, un_int, &new_ratio);
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy;
		  ixmax = ix;
		}
		ratio = MAX(ratio, new_ratio);

		ix += incx;
		iy += incy;
		iw += incw;
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("%16.8e", x[ix]);
		    printf("; ");
		    printf("%16.8e", y[iy]);
		    printf("; ");
		    printf("%24.16e", w[iw]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("%24.16e", alpha);
		  printf("; ");
		  printf("beta = ");
		  printf("%24.16e", beta);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_dwaxpby_s_s */

double do_test_zwaxpby_z_c(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_z_c";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i[2];
  float y_i[2];
  double alpha[2];
  double beta[2];
  double *x;
  float *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_z_c */
  double x_fix1[2];
  float x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  float *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj[2];
  float y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	X = xrand(seed);
	X_int = X * (power(2, 12) - 1);
	X = X_int;

	alpha[0] = X * X * X * X / power(2, 48);
	alpha[1] = X * X * X * X / power(2, 48);
	x_i[0] = 0.0;
	x_i[1] = X * X / power(2, 24);

	beta[0] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	y_i[0] = 0.0;
	y_i[1] = -(X * X - 1) / power(2, 24);


	xgen_val = 0;
	ygen_val = 0;
	for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	  x_gen[xgen_val] = x_i[0];
	  x_gen[1 + xgen_val] = x_i[1];
	  y_gen[ygen_val] = y_i[0];
	  y_gen[1 + ygen_val] = y_i[1];
	  head_w_true[wgen_val] = -1.0 / power(2, 72);
	  head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	  tail_w_true[wgen_val] = 0.0;
	  tail_w_true[wgen_val + 1] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;
	  incx *= 2;

	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj[0] = x_gen[j];
	    x_genj[1] = x_gen[1 + j];
	    x[ix] = x_genj[0];
	    x[1 + ix] = x_genj[1];
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;
	    incy *= 2;

	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj[0] = y_gen[j];
	      y_genj[1] = y_gen[1 + j];
	      y[iy] = y_genj[0];
	      y[1 + iy] = y_genj[1];
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_zwaxpby_z_c to get w */
	      FPU_FIX_STOP;
	      BLAS_zwaxpby_z_c(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_zdot_c_c(1, blas_no_conj, beta, alpha, &x[ix],
				   &w[iw], &head_w_true[test_val],
				   &tail_w_true[test_val], &x_fix2, incy,
				   &y[iy], incy, eps_int, un_int, &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
		    printf("; ");
		    printf("(%16.8e, %16.8e)", y[iy], y[iy + 1]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_z_c */

double do_test_zwaxpby_c_z(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_c_z";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i[2];
  double y_i[2];
  double alpha[2];
  double beta[2];
  float *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_c_z */
  float x_fix1[2];
  double x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  double *y_gen;
  double *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj[2];
  double y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	X = xrand(seed);
	X_int = X * (power(2, 12) - 1);
	X = X_int;

	alpha[0] = X * X * X * X / power(2, 48);
	alpha[1] = X * X * X * X / power(2, 48);
	x_i[0] = 0.0;
	x_i[1] = X * X / power(2, 24);

	beta[0] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	y_i[0] = 0.0;
	y_i[1] = -(X * X - 1) / power(2, 24);


	xgen_val = 0;
	ygen_val = 0;
	for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	  x_gen[xgen_val] = x_i[0];
	  x_gen[1 + xgen_val] = x_i[1];
	  y_gen[ygen_val] = y_i[0];
	  y_gen[1 + ygen_val] = y_i[1];
	  head_w_true[wgen_val] = -1.0 / power(2, 72);
	  head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	  tail_w_true[wgen_val] = 0.0;
	  tail_w_true[wgen_val + 1] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;
	  incx *= 2;

	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj[0] = x_gen[j];
	    x_genj[1] = x_gen[1 + j];
	    x[ix] = x_genj[0];
	    x[1 + ix] = x_genj[1];
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;
	    incy *= 2;

	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj[0] = y_gen[j];
	      y_genj[1] = y_gen[1 + j];
	      y[iy] = y_genj[0];
	      y[1 + iy] = y_genj[1];
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_zwaxpby_c_z to get w */
	      FPU_FIX_STOP;
	      BLAS_zwaxpby_c_z(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_zdot_c_c(1, blas_no_conj, alpha, beta, &y[iy],
				   &w[iw], &head_w_true[test_val],
				   &tail_w_true[test_val], &x_fix1, incx,
				   &x[ix], incx, eps_int, un_int, &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", y[iy], y[iy + 1]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_c_z */

double do_test_zwaxpby_c_c(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_c_c";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i[2];
  float y_i[2];
  double alpha[2];
  double beta[2];
  float *x;
  float *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_c_c */
  float x_fix1[2];
  float x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  double *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj[2];
  float y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	X = xrand(seed);
	X_int = X * (power(2, 12) - 1);
	X = X_int;

	alpha[0] = X * X * X * X / power(2, 48);
	alpha[1] = X * X * X * X / power(2, 48);
	x_i[0] = 0.0;
	x_i[1] = X * X / power(2, 24);

	beta[0] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	y_i[0] = 0.0;
	y_i[1] = -(X * X - 1) / power(2, 24);


	xgen_val = 0;
	ygen_val = 0;
	for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	  x_gen[xgen_val] = x_i[0];
	  x_gen[1 + xgen_val] = x_i[1];
	  y_gen[ygen_val] = y_i[0];
	  y_gen[1 + ygen_val] = y_i[1];
	  head_w_true[wgen_val] = -1.0 / power(2, 72);
	  head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	  tail_w_true[wgen_val] = 0.0;
	  tail_w_true[wgen_val + 1] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;
	  incx *= 2;

	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj[0] = x_gen[j];
	    x_genj[1] = x_gen[1 + j];
	    x[ix] = x_genj[0];
	    x[1 + ix] = x_genj[1];
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;
	    incy *= 2;

	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj[0] = y_gen[j];
	      y_genj[1] = y_gen[1 + j];
	      y[iy] = y_genj[0];
	      y[1 + iy] = y_genj[1];
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_zwaxpby_c_c to get w */
	      FPU_FIX_STOP;
	      BLAS_zwaxpby_c_c(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      temp_ab[0] = alpha[0];
	      temp_ab[0 + 1] = alpha[1];
	      temp_ab[incw_gen] = beta[0];
	      temp_ab[incw_gen + 1] = beta[1];

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		x_genj[0] = x[ix];
		x_genj[1] = x[1 + ix];
		temp_xy[0] = x_genj[0];
		temp_xy[1 + 0] = x_genj[1];

		y_genj[0] = y[iy];
		y_genj[1] = y[1 + iy];
		temp_xy[incy_gen] = y_genj[0];
		temp_xy[1 + incy_gen] = y_genj[1];

		test_BLAS_zdot_z_c(2, blas_no_conj, one, zero,
				   dummy, &w[iw],
				   &head_w_true[test_val],
				   &tail_w_true[test_val], &temp_ab[0], 1,
				   &temp_xy[0], 1, eps_int, un_int,
				   &new_ratio);
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy;
		  ixmax = ix;
		}
		ratio = MAX(ratio, new_ratio);

		ix += incx;
		iy += incy;
		iw += incw;
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
		    printf("; ");
		    printf("(%16.8e, %16.8e)", y[iy], y[iy + 1]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_c_c */

double do_test_cwaxpby_c_s(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cwaxpby_c_s";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i[2];
  float y_i;
  float alpha[2];
  float beta[2];
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_cwaxpby_c_s */
  float x_fix1[2];
  float x_fix2;
  float zero[2];
  float one[2];
  float dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj[2];
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;


  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2 = 1.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_S);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
    prec = blas_prec_single;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	x_fix1_temp = 1.0;
	BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj,
			  &atemp, 0, &btemp, 0,
			  &x_fix1_temp, &xtemp, seed,
			  &ytemp, &wltemp, &wttemp);
	x_gen[0] = 0.0;
	x_gen[1] = xtemp;
	alpha[0] = atemp;
	alpha[1] = atemp;


	y_gen[0] = ytemp;
	beta[0] = -btemp;
	beta[1] = btemp;

	head_w_true[0] = -wltemp;
	head_w_true[1] = wltemp;
	tail_w_true[0] = 0.0;
	tail_w_true[1] = 0.0;

	xgen_val = incx_gen;
	ygen_val = incy_gen;
	for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	     wgen_val += incw_gen) {
	  BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj, &atemp, 1, &btemp, 1,
			    &x_fix1_temp, &xtemp, seed, &ytemp, &wltemp,
			    &wttemp);

	  x_gen[xgen_val] = 0;
	  x_gen[xgen_val + 1] = xtemp;


	  y_gen[ygen_val] = ytemp;


	  head_w_true[wgen_val] = -wltemp;
	  head_w_true[wgen_val + 1] = wltemp;
	  tail_w_true[wgen_val] = 0.0;
	  tail_w_true[wgen_val + 1] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;
	  incx *= 2;

	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj[0] = x_gen[j];
	    x_genj[1] = x_gen[1 + j];
	    x[ix] = x_genj[0];
	    x[1 + ix] = x_genj[1];
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;


	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj = y_gen[j];
	      y[iy] = y_genj;
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_cwaxpby_c_s to get w */
	      FPU_FIX_STOP;
	      BLAS_cwaxpby_c_s(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_cdot_s_s(1, blas_no_conj, beta, alpha, &x[ix],
				   &w[iw], &head_w_true[test_val],
				   &tail_w_true[test_val], &x_fix2, incy,
				   &y[iy], incy, eps_int, un_int, &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
		    printf("; ");
		    printf("%16.8e", y[iy]);
		    printf("; ");
		    printf("(%16.8e, %16.8e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_cwaxpby_c_s */

double do_test_cwaxpby_s_c(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cwaxpby_s_c";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  float y_i[2];
  float alpha[2];
  float beta[2];
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_cwaxpby_s_c */
  float x_fix1;
  float x_fix2[2];
  float zero[2];
  float one[2];
  float dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  float y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;

  incy_gen *= 2;

  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_S);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
    prec = blas_prec_single;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	x_fix1_temp = 1.0;
	BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj,
			  &atemp, 0, &btemp, 0,
			  &x_fix1_temp, &xtemp, seed,
			  &ytemp, &wltemp, &wttemp);
	x_gen[0] = xtemp;
	alpha[0] = -atemp;
	alpha[1] = atemp;

	y_gen[0] = 0.0;
	y_gen[1] = ytemp;
	beta[0] = btemp;
	beta[1] = btemp;


	head_w_true[0] = -wltemp;
	head_w_true[1] = wltemp;
	tail_w_true[0] = 0.0;
	tail_w_true[1] = 0.0;

	xgen_val = incx_gen;
	ygen_val = incy_gen;
	for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	     wgen_val += incw_gen) {
	  BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj, &atemp, 1, &btemp, 1,
			    &x_fix1_temp, &xtemp, seed, &ytemp, &wltemp,
			    &wttemp);

	  x_gen[xgen_val] = xtemp;

	  y_gen[ygen_val] = 0;
	  y_gen[ygen_val + 1] = ytemp;




	  head_w_true[wgen_val] = -wltemp;
	  head_w_true[wgen_val + 1] = wltemp;
	  tail_w_true[wgen_val] = 0.0;
	  tail_w_true[wgen_val + 1] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;


	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj = x_gen[j];
	    x[ix] = x_genj;
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;
	    incy *= 2;

	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj[0] = y_gen[j];
	      y_genj[1] = y_gen[1 + j];
	      y[iy] = y_genj[0];
	      y[1 + iy] = y_genj[1];
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_cwaxpby_s_c to get w */
	      FPU_FIX_STOP;
	      BLAS_cwaxpby_s_c(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_cdot_s_s(1, blas_no_conj, alpha, beta, &y[iy],
				   &w[iw], &head_w_true[test_val],
				   &tail_w_true[test_val], &x_fix1, incx,
				   &x[ix], incx, eps_int, un_int, &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("%16.8e", x[ix]);
		    printf("; ");
		    printf("(%16.8e, %16.8e)", y[iy], y[iy + 1]);
		    printf("; ");
		    printf("(%16.8e, %16.8e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_cwaxpby_s_c */

double do_test_cwaxpby_s_s(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cwaxpby_s_s";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  float y_i;
  float alpha[2];
  float beta[2];
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_cwaxpby_s_s */
  float x_fix1;
  float x_fix2;
  float zero[2];
  float one[2];
  float dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;



  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_S);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
    prec = blas_prec_single;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	x_fix1_temp = 1.0;
	BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj,
			  &atemp, 0, &btemp, 0,
			  &x_fix1_temp, &xtemp, seed,
			  &ytemp, &wltemp, &wttemp);
	x_gen[0] = xtemp;
	alpha[0] = -atemp;
	alpha[1] = atemp;

	y_gen[0] = ytemp;
	beta[0] = -btemp;
	beta[1] = btemp;

	head_w_true[0] = -wltemp;
	head_w_true[1] = wltemp;
	tail_w_true[0] = 0.0;
	tail_w_true[1] = 0.0;

	xgen_val = incx_gen;
	ygen_val = incy_gen;
	for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	     wgen_val += incw_gen) {
	  BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj, &atemp, 1, &btemp, 1,
			    &x_fix1_temp, &xtemp, seed, &ytemp, &wltemp,
			    &wttemp);

	  x_gen[xgen_val] = xtemp;

	  y_gen[ygen_val] = ytemp;


	  head_w_true[wgen_val] = -wltemp;
	  head_w_true[wgen_val + 1] = wltemp;
	  tail_w_true[wgen_val] = 0.0;
	  tail_w_true[wgen_val + 1] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;


	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj = x_gen[j];
	    x[ix] = x_genj;
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;


	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj = y_gen[j];
	      y[iy] = y_genj;
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_cwaxpby_s_s to get w */
	      FPU_FIX_STOP;
	      BLAS_cwaxpby_s_s(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      temp_ab[0] = alpha[0];
	      temp_ab[0 + 1] = alpha[1];
	      temp_ab[incw_gen] = beta[0];
	      temp_ab[incw_gen + 1] = beta[1];

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		x_genj = x[ix];
		temp_xy[0] = x_genj;

		y_genj = y[iy];
		temp_xy[incy_gen] = y_genj;

		test_BLAS_cdot_c_s(2, blas_no_conj, one, zero,
				   dummy, &w[iw],
				   &head_w_true[test_val],
				   &tail_w_true[test_val], &temp_ab[0], 1,
				   &temp_xy[0], 1, eps_int, un_int,
				   &new_ratio);
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy;
		  ixmax = ix;
		}
		ratio = MAX(ratio, new_ratio);

		ix += incx;
		iy += incy;
		iw += incw;
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("%16.8e", x[ix]);
		    printf("; ");
		    printf("%16.8e", y[iy]);
		    printf("; ");
		    printf("(%16.8e, %16.8e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_cwaxpby_s_s */

double do_test_zwaxpby_z_d(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_z_d";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i[2];
  double y_i;
  double alpha[2];
  double beta[2];
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_z_d */
  double x_fix1[2];
  double x_fix2;
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj[2];
  double y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;


  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2 = 1.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	X = xrand(seed);
	X_int = X * (power(2, 12) - 1);
	X = X_int;

	alpha[0] = X * X * X * X / power(2, 48);
	alpha[1] = X * X * X * X / power(2, 48);
	x_i[0] = 0.0;
	x_i[1] = X * X / power(2, 24);

	beta[0] = -(X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	y_i = -(X * X - 1) / power(2, 24);


	xgen_val = 0;
	ygen_val = 0;
	for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	  x_gen[xgen_val] = x_i[0];
	  x_gen[1 + xgen_val] = x_i[1];
	  y_gen[ygen_val] = y_i;
	  head_w_true[wgen_val] = -1.0 / power(2, 72);
	  head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	  tail_w_true[wgen_val] = 0.0;
	  tail_w_true[wgen_val + 1] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;
	  incx *= 2;

	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj[0] = x_gen[j];
	    x_genj[1] = x_gen[1 + j];
	    x[ix] = x_genj[0];
	    x[1 + ix] = x_genj[1];
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;


	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj = y_gen[j];
	      y[iy] = y_genj;
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_zwaxpby_z_d to get w */
	      FPU_FIX_STOP;
	      BLAS_zwaxpby_z_d(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_zdot_d_d(1, blas_no_conj, beta, alpha, &x[ix],
				   &w[iw], &head_w_true[test_val],
				   &tail_w_true[test_val], &x_fix2, incy,
				   &y[iy], incy, eps_int, un_int, &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
		    printf("; ");
		    printf("%24.16e", y[iy]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_z_d */

double do_test_zwaxpby_d_z(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_d_z";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i;
  double y_i[2];
  double alpha[2];
  double beta[2];
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_d_z */
  double x_fix1;
  double x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj;
  double y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;

  incy_gen *= 2;

  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	X = xrand(seed);
	X_int = X * (power(2, 12) - 1);
	X = X_int;

	alpha[0] = -X * X * X * X / power(2, 48);
	alpha[1] = X * X * X * X / power(2, 48);
	x_i = X * X / power(2, 24);

	beta[0] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	y_i[0] = 0.0;
	y_i[1] = -(X * X - 1) / power(2, 24);


	xgen_val = 0;
	ygen_val = 0;
	for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	  x_gen[xgen_val] = x_i;
	  y_gen[ygen_val] = y_i[0];
	  y_gen[1 + ygen_val] = y_i[1];
	  head_w_true[wgen_val] = -1.0 / power(2, 72);
	  head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	  tail_w_true[wgen_val] = 0.0;
	  tail_w_true[wgen_val + 1] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;


	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj = x_gen[j];
	    x[ix] = x_genj;
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;
	    incy *= 2;

	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj[0] = y_gen[j];
	      y_genj[1] = y_gen[1 + j];
	      y[iy] = y_genj[0];
	      y[1 + iy] = y_genj[1];
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_zwaxpby_d_z to get w */
	      FPU_FIX_STOP;
	      BLAS_zwaxpby_d_z(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		test_BLAS_zdot_d_d(1, blas_no_conj, alpha, beta, &y[iy],
				   &w[iw], &head_w_true[test_val],
				   &tail_w_true[test_val], &x_fix1, incx,
				   &x[ix], incx, eps_int, un_int, &new_ratio);
		ix += incx;
		iy += incy;
		iw += incw;
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy - incy;
		  ixmax = ix - incx;
		}
		ratio = MAX(ratio, new_ratio);
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("%24.16e", x[ix]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", y[iy], y[iy + 1]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_d_z */

double do_test_zwaxpby_d_d(int n,
			   int ntests,
			   int *seed,
			   double thresh,
			   int debug, float test_prob,
			   double *min_ratio,
			   int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_d_d";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i;
  double y_i;
  double alpha[2];
  double beta[2];
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_d_d */
  double x_fix1;
  double x_fix2;
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj;
  double y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;


  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;



  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    eps_int = power(2, -BITS_D);
    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
    prec = blas_prec_double;

    /* values near underflow, 1, or overflow */
    for (norm = -1; norm <= 1; norm++) {

      /* number of tests */
      for (i = 0; i < ntests; i++) {

	/* generate test inputs */
	X = xrand(seed);
	X_int = X * (power(2, 12) - 1);
	X = X_int;

	alpha[0] = -X * X * X * X / power(2, 48);
	alpha[1] = X * X * X * X / power(2, 48);
	x_i = X * X / power(2, 24);

	beta[0] = -(X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	y_i = -(X * X - 1) / power(2, 24);


	xgen_val = 0;
	ygen_val = 0;
	for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	  x_gen[xgen_val] = x_i;
	  y_gen[ygen_val] = y_i;
	  head_w_true[wgen_val] = -1.0 / power(2, 72);
	  head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	  tail_w_true[wgen_val] = 0.0;
	  tail_w_true[wgen_val + 1] = 0.0;
	  xgen_val += incx_gen;
	  ygen_val += incy_gen;
	}

	count++;


	/* varying incx */
	for (incx_val = -2; incx_val <= 2; incx_val++) {
	  if (incx_val == 0)
	    continue;

	  /* setting incx */
	  incx = incx_val;


	  /* set x starting index */
	  ix = 0;
	  if (incx < 0)
	    ix = -(n - 1) * incx;

	  /* copy x_gen to x */
	  for (j = 0; j < n * incx_gen; j += incx_gen) {
	    x_genj = x_gen[j];
	    x[ix] = x_genj;
	    ix += incx;
	  }

	  /* varying incy */
	  for (incy_val = -2; incy_val <= 2; incy_val++) {
	    if (incy_val == 0)
	      continue;

	    /* setting incy */
	    incy = incy_val;


	    /* set y starting index */
	    iy = 0;
	    if (incy < 0)
	      iy = -(n - 1) * incy;

	    /* copy y_gen to y */
	    for (j = 0; j < n * incy_gen; j += incy_gen) {
	      y_genj = y_gen[j];
	      y[iy] = y_genj;
	      iy += incy;
	    }

	    /* varying incw */
	    for (incw_val = -2; incw_val <= 2; incw_val++) {
	      if (incw_val == 0)
		continue;

	      /* setting incw */
	      incw = incw_val;
	      incw *= 2;

	      /* For the sake of speed, we throw out this case at random */
	      if (xrand(seed) >= test_prob)
		continue;

	      /* call BLAS_zwaxpby_d_d to get w */
	      FPU_FIX_STOP;
	      BLAS_zwaxpby_d_d(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val);
	      FPU_FIX_START;

	      /* computing the ratio */
	      ix = 0;
	      if (incx < 0)
		ix = -(n - 1) * incx;
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;
	      iw = 0;
	      if (incw < 0)
		iw = -(n - 1) * incw;
	      ratio = 0.0;

	      temp_ab[0] = alpha[0];
	      temp_ab[0 + 1] = alpha[1];
	      temp_ab[incw_gen] = beta[0];
	      temp_ab[incw_gen + 1] = beta[1];

	      for (test_val = 0; test_val < n * incw_gen;
		   test_val += incw_gen) {
		x_genj = x[ix];
		temp_xy[0] = x_genj;

		y_genj = y[iy];
		temp_xy[incy_gen] = y_genj;

		test_BLAS_zdot_z_d(2, blas_no_conj, one, zero,
				   dummy, &w[iw],
				   &head_w_true[test_val],
				   &tail_w_true[test_val], &temp_ab[0], 1,
				   &temp_xy[0], 1, eps_int, un_int,
				   &new_ratio);
		if (MAX(ratio, new_ratio) == new_ratio) {
		  iymax = iy;
		  ixmax = ix;
		}
		ratio = MAX(ratio, new_ratio);

		ix += incx;
		iy += incy;
		iw += incw;
	      }

	      /* Increase the number of bad ratio, if the ratio
	         is bigger than the threshold.
	         The !<= below causes NaN error to be detected.
	         Note that (NaN > thresh) is always false. */
	      if (!(ratio <= thresh)) {
		bad_ratios++;

		if ((debug == 3) &&	/* print only when debug is on */
		    (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		    (d_count == find_max_ratio) &&
		    (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		  old_count = count;

		  printf
		    ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		     fname, n, ntests, thresh);
		  printf("seed = %d\n", *seed);
		  printf("norm = %d\n", norm);

		  /* Print test info */
		  switch (prec) {
		  case blas_prec_single:
		    printf("single ");
		    break;
		  case blas_prec_double:
		    printf("double ");
		    break;
		  case blas_prec_indigenous:
		    printf("indigenous ");
		    break;
		  case blas_prec_extra:
		    printf("extra ");
		    break;
		  }
		  switch (norm) {
		  case -1:
		    printf("near_underflow ");
		    break;
		  case 0:
		    printf("near_one ");
		    break;
		  case 1:
		    printf("near_overflow ");
		    break;
		  }

		  printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		  ix = 0;
		  iy = 0;
		  iw = 0;
		  if (incx < 0)
		    ix = -(n - 1) * incx;
		  if (incy < 0)
		    iy = -(n - 1) * incy;
		  if (incw < 0)
		    iw = -(n - 1) * incw;

		  for (j = 0; j < n; j++) {
		    printf("      ");
		    printf("%24.16e", x[ix]);
		    printf("; ");
		    printf("%24.16e", y[iy]);
		    printf("; ");
		    printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		    printf("; ");
		    ix += incx;
		    iy += incy;
		    iw += incw;
		  }

		  printf("      ");
		  printf("alpha = ");
		  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		  printf("; ");
		  printf("beta = ");
		  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		  printf("\n");
		  printf("      ratio=%.4e\n", ratio);
		  p_count++;
		}
	      }
	      if (d_count == 0) {

		if (ratio > ratio_max)
		  ratio_max = ratio;

		if (ratio != 0.0 && ratio < ratio_min)
		  ratio_min = ratio;

		tot_tests++;
	      }
	    }			/* incw */
	  }			/* incy */
	}			/* incx */
      }				/* tests */
    }				/* norm */

  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_d_d */

double do_test_swaxpby_x(int n,
			 int ntests,
			 int *seed,
			 double thresh,
			 int debug, float test_prob,
			 double *min_ratio,
			 int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_swaxpby_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  float y_i;
  float alpha;
  float beta;
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_swaxpby_x */
  float x_fix1;
  float x_fix2;
  float zero;
  float one;
  float dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;
  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj,
			    &alpha, 0, &beta, 0,
			    &x_fix1, &x_gen[0], seed,
			    &y_gen[0], &head_w_true[0], &tail_w_true[0]);

	  xgen_val = incx_gen;
	  ygen_val = incy_gen;
	  for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	       wgen_val += incw_gen) {
	    BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj, &alpha, 1, &beta,
			      1, &x_fix1, &x_gen[xgen_val], seed,
			      &y_gen[ygen_val], &head_w_true[wgen_val],
			      &tail_w_true[wgen_val]);
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;


	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj = x_gen[j];
	      x[ix] = x_genj;
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;


	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj = y_gen[j];
		y[iy] = y_genj;
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;


		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_swaxpby_x to get w */
		FPU_FIX_STOP;
		BLAS_swaxpby_x(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_sdot(1, blas_no_conj, alpha, beta, y[iy], w[iw],
				 head_w_true[test_val], tail_w_true[test_val],
				 &x_fix1, incx, &x[ix], incx, eps_int, un_int,
				 &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("%16.8e", x[ix]);
		      printf("; ");
		      printf("%16.8e", y[iy]);
		      printf("; ");
		      printf("%16.8e", w[iw]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("%16.8e", alpha);
		    printf("; ");
		    printf("beta = ");
		    printf("%16.8e", beta);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_swaxpby_x */

double do_test_dwaxpby_x(int n,
			 int ntests,
			 int *seed,
			 double thresh,
			 int debug, float test_prob,
			 double *min_ratio,
			 int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dwaxpby_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i;
  double y_i;
  double alpha;
  double beta;
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_dwaxpby_x */
  double x_fix1;
  double x_fix2;
  double zero;
  double one;
  double dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj;
  double y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;
  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  BLAS_ddot_testgen(1, 0, 1, norm, blas_no_conj,
			    &alpha, 0, &beta, 0,
			    &x_fix1, &x_gen[0], seed,
			    &y_gen[0], &head_w_true[0], &tail_w_true[0]);

	  xgen_val = incx_gen;
	  ygen_val = incy_gen;
	  for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	       wgen_val += incw_gen) {
	    BLAS_ddot_testgen(1, 0, 1, norm, blas_no_conj, &alpha, 1, &beta,
			      1, &x_fix1, &x_gen[xgen_val], seed,
			      &y_gen[ygen_val], &head_w_true[wgen_val],
			      &tail_w_true[wgen_val]);
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;


	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj = x_gen[j];
	      x[ix] = x_genj;
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;


	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj = y_gen[j];
		y[iy] = y_genj;
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;


		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_dwaxpby_x to get w */
		FPU_FIX_STOP;
		BLAS_dwaxpby_x(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_ddot(1, blas_no_conj, alpha, beta, y[iy], w[iw],
				 head_w_true[test_val], tail_w_true[test_val],
				 &x_fix1, incx, &x[ix], incx, eps_int, un_int,
				 &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("%24.16e", x[ix]);
		      printf("; ");
		      printf("%24.16e", y[iy]);
		      printf("; ");
		      printf("%24.16e", w[iw]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("%24.16e", alpha);
		    printf("; ");
		    printf("beta = ");
		    printf("%24.16e", beta);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_dwaxpby_x */

double do_test_cwaxpby_x(int n,
			 int ntests,
			 int *seed,
			 double thresh,
			 int debug, float test_prob,
			 double *min_ratio,
			 int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cwaxpby_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i[2];
  float y_i[2];
  float alpha[2];
  float beta[2];
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_cwaxpby_x */
  float x_fix1[2];
  float x_fix2[2];
  float zero[2];
  float one[2];
  float dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj[2];
  float y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  BLAS_cdot_testgen(1, 0, 1, norm, blas_no_conj,
			    &alpha, 0, &beta, 0,
			    &x_fix1, &x_gen[0], seed,
			    &y_gen[0], &head_w_true[0], &tail_w_true[0]);

	  xgen_val = incx_gen;
	  ygen_val = incy_gen;
	  for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	       wgen_val += incw_gen) {
	    BLAS_cdot_testgen(1, 0, 1, norm, blas_no_conj, &alpha, 1, &beta,
			      1, &x_fix1, &x_gen[xgen_val], seed,
			      &y_gen[ygen_val], &head_w_true[wgen_val],
			      &tail_w_true[wgen_val]);
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;
	    incx *= 2;

	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj[0] = x_gen[j];
	      x_genj[1] = x_gen[1 + j];
	      x[ix] = x_genj[0];
	      x[1 + ix] = x_genj[1];
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;
	      incy *= 2;

	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj[0] = y_gen[j];
		y_genj[1] = y_gen[1 + j];
		y[iy] = y_genj[0];
		y[1 + iy] = y_genj[1];
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_cwaxpby_x to get w */
		FPU_FIX_STOP;
		BLAS_cwaxpby_x(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_cdot(1, blas_no_conj, alpha, beta, &y[iy], &w[iw],
				 &head_w_true[test_val],
				 &tail_w_true[test_val], &x_fix1, incx,
				 &x[ix], incx, eps_int, un_int, &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
		      printf("; ");
		      printf("(%16.8e, %16.8e)", y[iy], y[iy + 1]);
		      printf("; ");
		      printf("(%16.8e, %16.8e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_cwaxpby_x */

double do_test_zwaxpby_x(int n,
			 int ntests,
			 int *seed,
			 double thresh,
			 int debug, float test_prob,
			 double *min_ratio,
			 int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i[2];
  double y_i[2];
  double alpha[2];
  double beta[2];
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_x */
  double x_fix1[2];
  double x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj[2];
  double y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  BLAS_zdot_testgen(1, 0, 1, norm, blas_no_conj,
			    &alpha, 0, &beta, 0,
			    &x_fix1, &x_gen[0], seed,
			    &y_gen[0], &head_w_true[0], &tail_w_true[0]);

	  xgen_val = incx_gen;
	  ygen_val = incy_gen;
	  for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	       wgen_val += incw_gen) {
	    BLAS_zdot_testgen(1, 0, 1, norm, blas_no_conj, &alpha, 1, &beta,
			      1, &x_fix1, &x_gen[xgen_val], seed,
			      &y_gen[ygen_val], &head_w_true[wgen_val],
			      &tail_w_true[wgen_val]);
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;
	    incx *= 2;

	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj[0] = x_gen[j];
	      x_genj[1] = x_gen[1 + j];
	      x[ix] = x_genj[0];
	      x[1 + ix] = x_genj[1];
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;
	      incy *= 2;

	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj[0] = y_gen[j];
		y_genj[1] = y_gen[1 + j];
		y[iy] = y_genj[0];
		y[1 + iy] = y_genj[1];
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_zwaxpby_x to get w */
		FPU_FIX_STOP;
		BLAS_zwaxpby_x(n, alpha, x, incx_val, beta, y, incy_val,
			       w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_zdot(1, blas_no_conj, alpha, beta, &y[iy], &w[iw],
				 &head_w_true[test_val],
				 &tail_w_true[test_val], &x_fix1, incx,
				 &x[ix], incx, eps_int, un_int, &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", y[iy], y[iy + 1]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_x */

double do_test_dwaxpby_d_s_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dwaxpby_d_s_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i;
  float y_i;
  double alpha;
  double beta;
  double *x;
  float *y;
  double *w;			/* the w computed  by BLAS_dwaxpby_d_s_x */
  double x_fix1;
  float x_fix2;
  double zero;
  double one;
  double dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  float *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj;
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;
  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  X = xrand(seed);
	  X_int = X * (power(2, 12) - 1);
	  X = X_int;

	  alpha = X * X * X * X / power(2, 48);
	  beta = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);

	  x_i = X * X / power(2, 24);
	  y_i = -(X * X - 1) / power(2, 24);

	  xgen_val = 0;
	  ygen_val = 0;
	  for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	    x_gen[xgen_val] = x_i;
	    y_gen[ygen_val] = y_i;
	    head_w_true[wgen_val] = 1.0 / power(2, 72);
	    tail_w_true[wgen_val] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;


	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj = x_gen[j];
	      x[ix] = x_genj;
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;


	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj = y_gen[j];
		y[iy] = y_genj;
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;


		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_dwaxpby_d_s_x to get w */
		FPU_FIX_STOP;
		BLAS_dwaxpby_d_s_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_ddot_s_s(1, blas_no_conj, beta, alpha, x[ix],
				     w[iw], head_w_true[test_val],
				     tail_w_true[test_val], &x_fix2, incy,
				     &y[iy], incy, eps_int, un_int,
				     &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("%24.16e", x[ix]);
		      printf("; ");
		      printf("%16.8e", y[iy]);
		      printf("; ");
		      printf("%24.16e", w[iw]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("%24.16e", alpha);
		    printf("; ");
		    printf("beta = ");
		    printf("%24.16e", beta);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_dwaxpby_d_s_x */

double do_test_dwaxpby_s_d_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dwaxpby_s_d_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  double y_i;
  double alpha;
  double beta;
  float *x;
  double *y;
  double *w;			/* the w computed  by BLAS_dwaxpby_s_d_x */
  float x_fix1;
  double x_fix2;
  double zero;
  double one;
  double dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  double *y_gen;
  double *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  double y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;
  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  X = xrand(seed);
	  X_int = X * (power(2, 12) - 1);
	  X = X_int;

	  alpha = X * X * X * X / power(2, 48);
	  beta = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);

	  x_i = X * X / power(2, 24);
	  y_i = -(X * X - 1) / power(2, 24);

	  xgen_val = 0;
	  ygen_val = 0;
	  for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	    x_gen[xgen_val] = x_i;
	    y_gen[ygen_val] = y_i;
	    head_w_true[wgen_val] = 1.0 / power(2, 72);
	    tail_w_true[wgen_val] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;


	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj = x_gen[j];
	      x[ix] = x_genj;
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;


	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj = y_gen[j];
		y[iy] = y_genj;
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;


		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_dwaxpby_s_d_x to get w */
		FPU_FIX_STOP;
		BLAS_dwaxpby_s_d_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_ddot_s_s(1, blas_no_conj, alpha, beta, y[iy],
				     w[iw], head_w_true[test_val],
				     tail_w_true[test_val], &x_fix1, incx,
				     &x[ix], incx, eps_int, un_int,
				     &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("%16.8e", x[ix]);
		      printf("; ");
		      printf("%24.16e", y[iy]);
		      printf("; ");
		      printf("%24.16e", w[iw]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("%24.16e", alpha);
		    printf("; ");
		    printf("beta = ");
		    printf("%24.16e", beta);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_dwaxpby_s_d_x */

double do_test_dwaxpby_s_s_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dwaxpby_s_s_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  float y_i;
  double alpha;
  double beta;
  float *x;
  float *y;
  double *w;			/* the w computed  by BLAS_dwaxpby_s_s_x */
  float x_fix1;
  float x_fix2;
  double zero;
  double one;
  double dummy;

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  double *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;
  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;




  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double));
  tail_w_true = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero = 0.0;
  one = 1.0;
  dummy = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  X = xrand(seed);
	  X_int = X * (power(2, 12) - 1);
	  X = X_int;

	  alpha = X * X * X * X / power(2, 48);
	  beta = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);

	  x_i = X * X / power(2, 24);
	  y_i = -(X * X - 1) / power(2, 24);

	  xgen_val = 0;
	  ygen_val = 0;
	  for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	    x_gen[xgen_val] = x_i;
	    y_gen[ygen_val] = y_i;
	    head_w_true[wgen_val] = 1.0 / power(2, 72);
	    tail_w_true[wgen_val] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;


	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj = x_gen[j];
	      x[ix] = x_genj;
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;


	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj = y_gen[j];
		y[iy] = y_genj;
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;


		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_dwaxpby_s_s_x to get w */
		FPU_FIX_STOP;
		BLAS_dwaxpby_s_s_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		temp_ab[0] = alpha;
		temp_ab[incw_gen] = beta;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  x_genj = x[ix];
		  temp_xy[0] = x_genj;

		  y_genj = y[iy];
		  temp_xy[incy_gen] = y_genj;

		  test_BLAS_ddot_d_s(2, blas_no_conj, one, zero,
				     dummy, w[iw],
				     head_w_true[test_val],
				     tail_w_true[test_val], temp_ab, 1,
				     temp_xy, 1, eps_int, un_int, &new_ratio);
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy;
		    ixmax = ix;
		  }
		  ratio = MAX(ratio, new_ratio);

		  ix += incx;
		  iy += incy;
		  iw += incw;
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("%16.8e", x[ix]);
		      printf("; ");
		      printf("%16.8e", y[iy]);
		      printf("; ");
		      printf("%24.16e", w[iw]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("%24.16e", alpha);
		    printf("; ");
		    printf("beta = ");
		    printf("%24.16e", beta);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_dwaxpby_s_s_x */

double do_test_zwaxpby_z_c_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_z_c_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i[2];
  float y_i[2];
  double alpha[2];
  double beta[2];
  double *x;
  float *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_z_c_x */
  double x_fix1[2];
  float x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  float *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj[2];
  float y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  X = xrand(seed);
	  X_int = X * (power(2, 12) - 1);
	  X = X_int;

	  alpha[0] = X * X * X * X / power(2, 48);
	  alpha[1] = X * X * X * X / power(2, 48);
	  x_i[0] = 0.0;
	  x_i[1] = X * X / power(2, 24);

	  beta[0] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  y_i[0] = 0.0;
	  y_i[1] = -(X * X - 1) / power(2, 24);


	  xgen_val = 0;
	  ygen_val = 0;
	  for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	    x_gen[xgen_val] = x_i[0];
	    x_gen[1 + xgen_val] = x_i[1];
	    y_gen[ygen_val] = y_i[0];
	    y_gen[1 + ygen_val] = y_i[1];
	    head_w_true[wgen_val] = -1.0 / power(2, 72);
	    head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	    tail_w_true[wgen_val] = 0.0;
	    tail_w_true[wgen_val + 1] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;
	    incx *= 2;

	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj[0] = x_gen[j];
	      x_genj[1] = x_gen[1 + j];
	      x[ix] = x_genj[0];
	      x[1 + ix] = x_genj[1];
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;
	      incy *= 2;

	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj[0] = y_gen[j];
		y_genj[1] = y_gen[1 + j];
		y[iy] = y_genj[0];
		y[1 + iy] = y_genj[1];
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_zwaxpby_z_c_x to get w */
		FPU_FIX_STOP;
		BLAS_zwaxpby_z_c_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_zdot_c_c(1, blas_no_conj, beta, alpha, &x[ix],
				     &w[iw], &head_w_true[test_val],
				     &tail_w_true[test_val], &x_fix2, incy,
				     &y[iy], incy, eps_int, un_int,
				     &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
		      printf("; ");
		      printf("(%16.8e, %16.8e)", y[iy], y[iy + 1]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_z_c_x */

double do_test_zwaxpby_c_z_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_c_z_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i[2];
  double y_i[2];
  double alpha[2];
  double beta[2];
  float *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_c_z_x */
  float x_fix1[2];
  double x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  double *y_gen;
  double *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj[2];
  double y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  X = xrand(seed);
	  X_int = X * (power(2, 12) - 1);
	  X = X_int;

	  alpha[0] = X * X * X * X / power(2, 48);
	  alpha[1] = X * X * X * X / power(2, 48);
	  x_i[0] = 0.0;
	  x_i[1] = X * X / power(2, 24);

	  beta[0] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  y_i[0] = 0.0;
	  y_i[1] = -(X * X - 1) / power(2, 24);


	  xgen_val = 0;
	  ygen_val = 0;
	  for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	    x_gen[xgen_val] = x_i[0];
	    x_gen[1 + xgen_val] = x_i[1];
	    y_gen[ygen_val] = y_i[0];
	    y_gen[1 + ygen_val] = y_i[1];
	    head_w_true[wgen_val] = -1.0 / power(2, 72);
	    head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	    tail_w_true[wgen_val] = 0.0;
	    tail_w_true[wgen_val + 1] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;
	    incx *= 2;

	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj[0] = x_gen[j];
	      x_genj[1] = x_gen[1 + j];
	      x[ix] = x_genj[0];
	      x[1 + ix] = x_genj[1];
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;
	      incy *= 2;

	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj[0] = y_gen[j];
		y_genj[1] = y_gen[1 + j];
		y[iy] = y_genj[0];
		y[1 + iy] = y_genj[1];
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_zwaxpby_c_z_x to get w */
		FPU_FIX_STOP;
		BLAS_zwaxpby_c_z_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_zdot_c_c(1, blas_no_conj, alpha, beta, &y[iy],
				     &w[iw], &head_w_true[test_val],
				     &tail_w_true[test_val], &x_fix1, incx,
				     &x[ix], incx, eps_int, un_int,
				     &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", y[iy], y[iy + 1]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_c_z_x */

double do_test_zwaxpby_c_c_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_c_c_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i[2];
  float y_i[2];
  double alpha[2];
  double beta[2];
  float *x;
  float *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_c_c_x */
  float x_fix1[2];
  float x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  double *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj[2];
  float y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;
  incy_gen *= 2;

  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  X = xrand(seed);
	  X_int = X * (power(2, 12) - 1);
	  X = X_int;

	  alpha[0] = X * X * X * X / power(2, 48);
	  alpha[1] = X * X * X * X / power(2, 48);
	  x_i[0] = 0.0;
	  x_i[1] = X * X / power(2, 24);

	  beta[0] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  y_i[0] = 0.0;
	  y_i[1] = -(X * X - 1) / power(2, 24);


	  xgen_val = 0;
	  ygen_val = 0;
	  for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	    x_gen[xgen_val] = x_i[0];
	    x_gen[1 + xgen_val] = x_i[1];
	    y_gen[ygen_val] = y_i[0];
	    y_gen[1 + ygen_val] = y_i[1];
	    head_w_true[wgen_val] = -1.0 / power(2, 72);
	    head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	    tail_w_true[wgen_val] = 0.0;
	    tail_w_true[wgen_val + 1] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;
	    incx *= 2;

	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj[0] = x_gen[j];
	      x_genj[1] = x_gen[1 + j];
	      x[ix] = x_genj[0];
	      x[1 + ix] = x_genj[1];
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;
	      incy *= 2;

	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj[0] = y_gen[j];
		y_genj[1] = y_gen[1 + j];
		y[iy] = y_genj[0];
		y[1 + iy] = y_genj[1];
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_zwaxpby_c_c_x to get w */
		FPU_FIX_STOP;
		BLAS_zwaxpby_c_c_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		temp_ab[0] = alpha[0];
		temp_ab[0 + 1] = alpha[1];
		temp_ab[incw_gen] = beta[0];
		temp_ab[incw_gen + 1] = beta[1];

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  x_genj[0] = x[ix];
		  x_genj[1] = x[1 + ix];
		  temp_xy[0] = x_genj[0];
		  temp_xy[1 + 0] = x_genj[1];

		  y_genj[0] = y[iy];
		  y_genj[1] = y[1 + iy];
		  temp_xy[incy_gen] = y_genj[0];
		  temp_xy[1 + incy_gen] = y_genj[1];

		  test_BLAS_zdot_z_c(2, blas_no_conj, one, zero,
				     dummy, &w[iw],
				     &head_w_true[test_val],
				     &tail_w_true[test_val], &temp_ab[0], 1,
				     &temp_xy[0], 1, eps_int, un_int,
				     &new_ratio);
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy;
		    ixmax = ix;
		  }
		  ratio = MAX(ratio, new_ratio);

		  ix += incx;
		  iy += incy;
		  iw += incw;
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
		      printf("; ");
		      printf("(%16.8e, %16.8e)", y[iy], y[iy + 1]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_c_c_x */

double do_test_cwaxpby_c_s_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cwaxpby_c_s_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i[2];
  float y_i;
  float alpha[2];
  float beta[2];
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_cwaxpby_c_s_x */
  float x_fix1[2];
  float x_fix2;
  float zero[2];
  float one[2];
  float dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj[2];
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;


  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2 = 1.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  x_fix1_temp = 1.0;
	  BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj,
			    &atemp, 0, &btemp, 0,
			    &x_fix1_temp, &xtemp, seed,
			    &ytemp, &wltemp, &wttemp);
	  x_gen[0] = 0.0;
	  x_gen[1] = xtemp;
	  alpha[0] = atemp;
	  alpha[1] = atemp;


	  y_gen[0] = ytemp;
	  beta[0] = -btemp;
	  beta[1] = btemp;

	  head_w_true[0] = -wltemp;
	  head_w_true[1] = wltemp;
	  tail_w_true[0] = 0.0;
	  tail_w_true[1] = 0.0;

	  xgen_val = incx_gen;
	  ygen_val = incy_gen;
	  for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	       wgen_val += incw_gen) {
	    BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj, &atemp, 1, &btemp,
			      1, &x_fix1_temp, &xtemp, seed, &ytemp, &wltemp,
			      &wttemp);

	    x_gen[xgen_val] = 0;
	    x_gen[xgen_val + 1] = xtemp;


	    y_gen[ygen_val] = ytemp;


	    head_w_true[wgen_val] = -wltemp;
	    head_w_true[wgen_val + 1] = wltemp;
	    tail_w_true[wgen_val] = 0.0;
	    tail_w_true[wgen_val + 1] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;
	    incx *= 2;

	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj[0] = x_gen[j];
	      x_genj[1] = x_gen[1 + j];
	      x[ix] = x_genj[0];
	      x[1 + ix] = x_genj[1];
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;


	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj = y_gen[j];
		y[iy] = y_genj;
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_cwaxpby_c_s_x to get w */
		FPU_FIX_STOP;
		BLAS_cwaxpby_c_s_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_cdot_s_s(1, blas_no_conj, beta, alpha, &x[ix],
				     &w[iw], &head_w_true[test_val],
				     &tail_w_true[test_val], &x_fix2, incy,
				     &y[iy], incy, eps_int, un_int,
				     &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
		      printf("; ");
		      printf("%16.8e", y[iy]);
		      printf("; ");
		      printf("(%16.8e, %16.8e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_cwaxpby_c_s_x */

double do_test_cwaxpby_s_c_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cwaxpby_s_c_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  float y_i[2];
  float alpha[2];
  float beta[2];
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_cwaxpby_s_c_x */
  float x_fix1;
  float x_fix2[2];
  float zero[2];
  float one[2];
  float dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  float y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;

  incy_gen *= 2;

  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  x_fix1_temp = 1.0;
	  BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj,
			    &atemp, 0, &btemp, 0,
			    &x_fix1_temp, &xtemp, seed,
			    &ytemp, &wltemp, &wttemp);
	  x_gen[0] = xtemp;
	  alpha[0] = -atemp;
	  alpha[1] = atemp;

	  y_gen[0] = 0.0;
	  y_gen[1] = ytemp;
	  beta[0] = btemp;
	  beta[1] = btemp;


	  head_w_true[0] = -wltemp;
	  head_w_true[1] = wltemp;
	  tail_w_true[0] = 0.0;
	  tail_w_true[1] = 0.0;

	  xgen_val = incx_gen;
	  ygen_val = incy_gen;
	  for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	       wgen_val += incw_gen) {
	    BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj, &atemp, 1, &btemp,
			      1, &x_fix1_temp, &xtemp, seed, &ytemp, &wltemp,
			      &wttemp);

	    x_gen[xgen_val] = xtemp;

	    y_gen[ygen_val] = 0;
	    y_gen[ygen_val + 1] = ytemp;




	    head_w_true[wgen_val] = -wltemp;
	    head_w_true[wgen_val + 1] = wltemp;
	    tail_w_true[wgen_val] = 0.0;
	    tail_w_true[wgen_val + 1] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;


	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj = x_gen[j];
	      x[ix] = x_genj;
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;
	      incy *= 2;

	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj[0] = y_gen[j];
		y_genj[1] = y_gen[1 + j];
		y[iy] = y_genj[0];
		y[1 + iy] = y_genj[1];
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_cwaxpby_s_c_x to get w */
		FPU_FIX_STOP;
		BLAS_cwaxpby_s_c_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_cdot_s_s(1, blas_no_conj, alpha, beta, &y[iy],
				     &w[iw], &head_w_true[test_val],
				     &tail_w_true[test_val], &x_fix1, incx,
				     &x[ix], incx, eps_int, un_int,
				     &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("%16.8e", x[ix]);
		      printf("; ");
		      printf("(%16.8e, %16.8e)", y[iy], y[iy + 1]);
		      printf("; ");
		      printf("(%16.8e, %16.8e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_cwaxpby_s_c_x */

double do_test_cwaxpby_s_s_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cwaxpby_s_s_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float x_i;
  float y_i;
  float alpha[2];
  float beta[2];
  float *x;
  float *y;
  float *w;			/* the w computed  by BLAS_cwaxpby_s_s_x */
  float x_fix1;
  float x_fix2;
  float zero[2];
  float one[2];
  float dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *x_gen;
  float *y_gen;
  float *temp_ab;
  float *temp_xy;


  /* added by DY */
  float x_genj;
  float y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;



  /* get space for calculation */
  x = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(n * 2 * sizeof(float));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(n * sizeof(float));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (float *) blas_malloc(2 * sizeof(float) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (float *) blas_malloc(2 * sizeof(float));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  x_fix1_temp = 1.0;
	  BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj,
			    &atemp, 0, &btemp, 0,
			    &x_fix1_temp, &xtemp, seed,
			    &ytemp, &wltemp, &wttemp);
	  x_gen[0] = xtemp;
	  alpha[0] = -atemp;
	  alpha[1] = atemp;

	  y_gen[0] = ytemp;
	  beta[0] = -btemp;
	  beta[1] = btemp;

	  head_w_true[0] = -wltemp;
	  head_w_true[1] = wltemp;
	  tail_w_true[0] = 0.0;
	  tail_w_true[1] = 0.0;

	  xgen_val = incx_gen;
	  ygen_val = incy_gen;
	  for (wgen_val = incw_gen; wgen_val < n * incw_gen;
	       wgen_val += incw_gen) {
	    BLAS_sdot_testgen(1, 0, 1, norm, blas_no_conj, &atemp, 1, &btemp,
			      1, &x_fix1_temp, &xtemp, seed, &ytemp, &wltemp,
			      &wttemp);

	    x_gen[xgen_val] = xtemp;

	    y_gen[ygen_val] = ytemp;


	    head_w_true[wgen_val] = -wltemp;
	    head_w_true[wgen_val + 1] = wltemp;
	    tail_w_true[wgen_val] = 0.0;
	    tail_w_true[wgen_val + 1] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;


	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj = x_gen[j];
	      x[ix] = x_genj;
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;


	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj = y_gen[j];
		y[iy] = y_genj;
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_cwaxpby_s_s_x to get w */
		FPU_FIX_STOP;
		BLAS_cwaxpby_s_s_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		temp_ab[0] = alpha[0];
		temp_ab[0 + 1] = alpha[1];
		temp_ab[incw_gen] = beta[0];
		temp_ab[incw_gen + 1] = beta[1];

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  x_genj = x[ix];
		  temp_xy[0] = x_genj;

		  y_genj = y[iy];
		  temp_xy[incy_gen] = y_genj;

		  test_BLAS_cdot_c_s(2, blas_no_conj, one, zero,
				     dummy, &w[iw],
				     &head_w_true[test_val],
				     &tail_w_true[test_val], &temp_ab[0], 1,
				     &temp_xy[0], 1, eps_int, un_int,
				     &new_ratio);
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy;
		    ixmax = ix;
		  }
		  ratio = MAX(ratio, new_ratio);

		  ix += incx;
		  iy += incy;
		  iw += incw;
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("%16.8e", x[ix]);
		      printf("; ");
		      printf("%16.8e", y[iy]);
		      printf("; ");
		      printf("(%16.8e, %16.8e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_cwaxpby_s_s_x */

double do_test_zwaxpby_z_d_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_z_d_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i[2];
  double y_i;
  double alpha[2];
  double beta[2];
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_z_d_x */
  double x_fix1[2];
  double x_fix2;
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj[2];
  double y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i[0] = x_i[1] = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;
  incx_gen *= 2;


  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1[0] = 1.0;
  x_fix1[1] = 0.0;
  x_fix2 = 1.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  X = xrand(seed);
	  X_int = X * (power(2, 12) - 1);
	  X = X_int;

	  alpha[0] = X * X * X * X / power(2, 48);
	  alpha[1] = X * X * X * X / power(2, 48);
	  x_i[0] = 0.0;
	  x_i[1] = X * X / power(2, 24);

	  beta[0] = -(X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  y_i = -(X * X - 1) / power(2, 24);


	  xgen_val = 0;
	  ygen_val = 0;
	  for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	    x_gen[xgen_val] = x_i[0];
	    x_gen[1 + xgen_val] = x_i[1];
	    y_gen[ygen_val] = y_i;
	    head_w_true[wgen_val] = -1.0 / power(2, 72);
	    head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	    tail_w_true[wgen_val] = 0.0;
	    tail_w_true[wgen_val + 1] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;
	    incx *= 2;

	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj[0] = x_gen[j];
	      x_genj[1] = x_gen[1 + j];
	      x[ix] = x_genj[0];
	      x[1 + ix] = x_genj[1];
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;


	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj = y_gen[j];
		y[iy] = y_genj;
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_zwaxpby_z_d_x to get w */
		FPU_FIX_STOP;
		BLAS_zwaxpby_z_d_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_zdot_d_d(1, blas_no_conj, beta, alpha, &x[ix],
				     &w[iw], &head_w_true[test_val],
				     &tail_w_true[test_val], &x_fix2, incy,
				     &y[iy], incy, eps_int, un_int,
				     &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
		      printf("; ");
		      printf("%24.16e", y[iy]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_z_d_x */

double do_test_zwaxpby_d_z_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_d_z_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i;
  double y_i[2];
  double alpha[2];
  double beta[2];
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_d_z_x */
  double x_fix1;
  double x_fix2[2];
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj;
  double y_genj[2];
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i[0] = y_i[1] = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;

  incy_gen *= 2;

  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2[0] = 1.0;
  x_fix2[1] = 0.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  X = xrand(seed);
	  X_int = X * (power(2, 12) - 1);
	  X = X_int;

	  alpha[0] = -X * X * X * X / power(2, 48);
	  alpha[1] = X * X * X * X / power(2, 48);
	  x_i = X * X / power(2, 24);

	  beta[0] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  y_i[0] = 0.0;
	  y_i[1] = -(X * X - 1) / power(2, 24);


	  xgen_val = 0;
	  ygen_val = 0;
	  for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	    x_gen[xgen_val] = x_i;
	    y_gen[ygen_val] = y_i[0];
	    y_gen[1 + ygen_val] = y_i[1];
	    head_w_true[wgen_val] = -1.0 / power(2, 72);
	    head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	    tail_w_true[wgen_val] = 0.0;
	    tail_w_true[wgen_val + 1] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;


	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj = x_gen[j];
	      x[ix] = x_genj;
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;
	      incy *= 2;

	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj[0] = y_gen[j];
		y_genj[1] = y_gen[1 + j];
		y[iy] = y_genj[0];
		y[1 + iy] = y_genj[1];
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_zwaxpby_d_z_x to get w */
		FPU_FIX_STOP;
		BLAS_zwaxpby_d_z_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  test_BLAS_zdot_d_d(1, blas_no_conj, alpha, beta, &y[iy],
				     &w[iw], &head_w_true[test_val],
				     &tail_w_true[test_val], &x_fix1, incx,
				     &x[ix], incx, eps_int, un_int,
				     &new_ratio);
		  ix += incx;
		  iy += incy;
		  iw += incw;
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy - incy;
		    ixmax = ix - incx;
		  }
		  ratio = MAX(ratio, new_ratio);
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("%24.16e", x[ix]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", y[iy], y[iy + 1]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_d_z_x */

double do_test_zwaxpby_d_d_x(int n,
			     int ntests,
			     int *seed,
			     double thresh,
			     int debug, float test_prob,
			     double *min_ratio,
			     int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on waxpby  
 *
 * Arguments
 * =========
 *
 * n         (input) int
 *           The size of vector being tested
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, w_true, w, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *                incx loop     -- varying incx: -2, -1, 1, 2
 *                  incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zwaxpby_d_d_x";

  /* max number of debug lines to print */
  const int max_print = 32;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j;			/* multipurpose counter */
  int ix, iy, iw;		/* use to index x, y, w respectively */
  int incx_val, incy_val, incw_val,	/* for testing different inc values */
    incx, incy, incw, gen_val, test_val;
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  int X_int;
  double X;
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double ratio;			/* the per-use test ratio from test() */
  double new_ratio;
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double x_i;
  double y_i;
  double alpha[2];
  double beta[2];
  double *x;
  double *y;
  double *w;			/* the w computed  by BLAS_zwaxpby_d_d_x */
  double x_fix1;
  double x_fix2;
  double zero[2];
  double one[2];
  double dummy[2];

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *x_gen;
  double *y_gen;
  double *temp_ab;
  double *temp_xy;


  /* added by DY */
  double x_genj;
  double y_genj;
  int incy_gen, incx_gen, incw_gen;
  int xgen_val, ygen_val, wgen_val;
  int iymax, ixmax;
  float xtemp;
  float ytemp;
  float atemp;
  float btemp;
  double wltemp;
  double wttemp;
  float x_fix1_temp;

  /* the true w calculated by testgen(), in double-double */
  double *head_w_true, *tail_w_true;

  int prec_val;
  enum blas_prec_type prec;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* There are there to get rid of compiler warnings.
     Should modify M4 code to not even produce these variables when not
     needed. */
  xtemp = ytemp = atemp = btemp = 0.0;
  wltemp = wttemp = x_fix1_temp = 0.0;
  x_i = 0.0;
  y_i = 0.0;
  X = 0.0;
  X_int = 0;
  gen_val = 0;

  /* test for bad arguments */
  if (n < 0)
    BLAS_error(fname, -1, n, NULL);
  if (ntests < 0)
    BLAS_error(fname, -2, ntests, NULL);

  /* if there is nothing to test, return all zero */
  if (n == 0 || ntests == 0) {
    *min_ratio = 0.0;
    *num_bad_ratio = 0;
    *num_tests = 0;
    return 0.0;
  }

  FPU_FIX_START;

  incw_gen = 1;
  incx_gen = 1;
  incy_gen = 1;
  incw_gen *= 2;



  /* get space for calculation */
  x = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(n * 2 * sizeof(double));
  if (n * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  w = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
  if (n * 2 > 0 && w == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  tail_w_true = (double *) blas_malloc(n * sizeof(double) * 2);
  if (n > 0 && (head_w_true == NULL || tail_w_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  x_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(n * sizeof(double));
  if (n > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_ab = (double *) blas_malloc(2 * sizeof(double) * 2);
  if (2 > 0 && temp_ab == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp_xy = (double *) blas_malloc(2 * sizeof(double));
  if (2 > 0 && temp_xy == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* initialization */
  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  old_count = 0;
  bad_ratios = 0;

  find_max_ratio = 0;
  if (debug == 3)
    find_max_ratio = 1;
  x_fix1 = 1.0;
  x_fix2 = 1.0;
  zero[0] = zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  dummy[0] = dummy[1] = 0.0;;


  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */


    /* varying extra precs */
    for (prec_val = 0; prec_val <= 2; prec_val++) {
      switch (prec_val) {
      case 0:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 1:
	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;
	break;
      case 2:
      default:
	eps_int = power(2, -BITS_E);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	prec = blas_prec_extra;
	break;
      }

      /* values near underflow, 1, or overflow */
      for (norm = -1; norm <= 1; norm++) {

	/* number of tests */
	for (i = 0; i < ntests; i++) {

	  /* generate test inputs */
	  X = xrand(seed);
	  X_int = X * (power(2, 12) - 1);
	  X = X_int;

	  alpha[0] = -X * X * X * X / power(2, 48);
	  alpha[1] = X * X * X * X / power(2, 48);
	  x_i = X * X / power(2, 24);

	  beta[0] = -(X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  beta[1] = (X * X + X + 1) * (X * X - X + 1) / power(2, 48);
	  y_i = -(X * X - 1) / power(2, 24);


	  xgen_val = 0;
	  ygen_val = 0;
	  for (wgen_val = 0; wgen_val < n * incw_gen; wgen_val += incw_gen) {
	    x_gen[xgen_val] = x_i;
	    y_gen[ygen_val] = y_i;
	    head_w_true[wgen_val] = -1.0 / power(2, 72);
	    head_w_true[wgen_val + 1] = 1.0 / power(2, 72);
	    tail_w_true[wgen_val] = 0.0;
	    tail_w_true[wgen_val + 1] = 0.0;
	    xgen_val += incx_gen;
	    ygen_val += incy_gen;
	  }

	  count++;


	  /* varying incx */
	  for (incx_val = -2; incx_val <= 2; incx_val++) {
	    if (incx_val == 0)
	      continue;

	    /* setting incx */
	    incx = incx_val;


	    /* set x starting index */
	    ix = 0;
	    if (incx < 0)
	      ix = -(n - 1) * incx;

	    /* copy x_gen to x */
	    for (j = 0; j < n * incx_gen; j += incx_gen) {
	      x_genj = x_gen[j];
	      x[ix] = x_genj;
	      ix += incx;
	    }

	    /* varying incy */
	    for (incy_val = -2; incy_val <= 2; incy_val++) {
	      if (incy_val == 0)
		continue;

	      /* setting incy */
	      incy = incy_val;


	      /* set y starting index */
	      iy = 0;
	      if (incy < 0)
		iy = -(n - 1) * incy;

	      /* copy y_gen to y */
	      for (j = 0; j < n * incy_gen; j += incy_gen) {
		y_genj = y_gen[j];
		y[iy] = y_genj;
		iy += incy;
	      }

	      /* varying incw */
	      for (incw_val = -2; incw_val <= 2; incw_val++) {
		if (incw_val == 0)
		  continue;

		/* setting incw */
		incw = incw_val;
		incw *= 2;

		/* For the sake of speed, we throw out this case at random */
		if (xrand(seed) >= test_prob)
		  continue;

		/* call BLAS_zwaxpby_d_d_x to get w */
		FPU_FIX_STOP;
		BLAS_zwaxpby_d_d_x(n, alpha, x, incx_val, beta, y, incy_val,
				   w, incw_val, prec);
		FPU_FIX_START;

		/* computing the ratio */
		ix = 0;
		if (incx < 0)
		  ix = -(n - 1) * incx;
		iy = 0;
		if (incy < 0)
		  iy = -(n - 1) * incy;
		iw = 0;
		if (incw < 0)
		  iw = -(n - 1) * incw;
		ratio = 0.0;

		temp_ab[0] = alpha[0];
		temp_ab[0 + 1] = alpha[1];
		temp_ab[incw_gen] = beta[0];
		temp_ab[incw_gen + 1] = beta[1];

		for (test_val = 0; test_val < n * incw_gen;
		     test_val += incw_gen) {
		  x_genj = x[ix];
		  temp_xy[0] = x_genj;

		  y_genj = y[iy];
		  temp_xy[incy_gen] = y_genj;

		  test_BLAS_zdot_z_d(2, blas_no_conj, one, zero,
				     dummy, &w[iw],
				     &head_w_true[test_val],
				     &tail_w_true[test_val], &temp_ab[0], 1,
				     &temp_xy[0], 1, eps_int, un_int,
				     &new_ratio);
		  if (MAX(ratio, new_ratio) == new_ratio) {
		    iymax = iy;
		    ixmax = ix;
		  }
		  ratio = MAX(ratio, new_ratio);

		  ix += incx;
		  iy += incy;
		  iw += incw;
		}

		/* Increase the number of bad ratio, if the ratio
		   is bigger than the threshold.
		   The !<= below causes NaN error to be detected.
		   Note that (NaN > thresh) is always false. */
		if (!(ratio <= thresh)) {
		  bad_ratios++;

		  if ((debug == 3) &&	/* print only when debug is on */
		      (count != old_count) &&	/* print if old vector is different 
						   from the current one */
		      (d_count == find_max_ratio) &&
		      (p_count <= max_print) && (ratio > 0.5 * ratio_max)) {
		    old_count = count;

		    printf
		      ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
		       fname, n, ntests, thresh);
		    printf("seed = %d\n", *seed);
		    printf("norm = %d\n", norm);

		    /* Print test info */
		    switch (prec) {
		    case blas_prec_single:
		      printf("single ");
		      break;
		    case blas_prec_double:
		      printf("double ");
		      break;
		    case blas_prec_indigenous:
		      printf("indigenous ");
		      break;
		    case blas_prec_extra:
		      printf("extra ");
		      break;
		    }
		    switch (norm) {
		    case -1:
		      printf("near_underflow ");
		      break;
		    case 0:
		      printf("near_one ");
		      break;
		    case 1:
		      printf("near_overflow ");
		      break;
		    }

		    printf("incx=%d, incy=%d, incw=%d:\n", incx, incy, incw);

		    ix = 0;
		    iy = 0;
		    iw = 0;
		    if (incx < 0)
		      ix = -(n - 1) * incx;
		    if (incy < 0)
		      iy = -(n - 1) * incy;
		    if (incw < 0)
		      iw = -(n - 1) * incw;

		    for (j = 0; j < n; j++) {
		      printf("      ");
		      printf("%24.16e", x[ix]);
		      printf("; ");
		      printf("%24.16e", y[iy]);
		      printf("; ");
		      printf("(%24.16e, %24.16e)", w[iw], w[iw + 1]);
		      printf("; ");
		      ix += incx;
		      iy += incy;
		      iw += incw;
		    }

		    printf("      ");
		    printf("alpha = ");
		    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
		    printf("; ");
		    printf("beta = ");
		    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
		    printf("\n");
		    printf("      ratio=%.4e\n", ratio);
		    p_count++;
		  }
		}
		if (d_count == 0) {

		  if (ratio > ratio_max)
		    ratio_max = ratio;

		  if (ratio != 0.0 && ratio < ratio_min)
		    ratio_min = ratio;

		  tot_tests++;
		}
	      }			/* incw */
	    }			/* incy */
	  }			/* incx */
	}			/* tests */
      }				/* norm */
    }				/* prec */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && (bad_ratios > 0))) {
    printf("      %s:  n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, n, ntests, thresh);
    if (ratio_min == 1.0e+308)
      ratio_min = 0.0;
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

  blas_free(x);
  blas_free(y);
  blas_free(w);
  blas_free(head_w_true);
  blas_free(tail_w_true);
  blas_free(x_gen);
  blas_free(y_gen);
  blas_free(temp_ab);
  blas_free(temp_xy);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  FPU_FIX_STOP;
  return ratio_max;
}				/* end of do_test_zwaxpby_d_d_x */

int main(int argc, char **argv)
{
  int nsizes, ntests, debug;
  double thresh, test_prob;
  double total_min_ratio, total_max_ratio;
  int total_bad_ratios;
  int seed, num_bad_ratio, num_tests;
  int total_tests, nr_failed_routines = 0, nr_routines = 0;
  double min_ratio, max_ratio;
  const char *base_routine = "waxpby";
  char *fname;
  int n;


  if (argc != 6) {
    printf("Usage:\n");
    printf("do_test_waxpby <nsizes> <ntests> <thresh> <debug> <test_prob>\n");
    printf("   <nsizes>: number of sizes to be run.\n");
    printf
      ("   <ntests>: the number of tests performed for each set of attributes\n");
    printf
      ("   <thresh>: to catch bad ratios if it is greater than <thresh>\n");
    printf("    <debug>: 0, 1, 2, or 3; \n");
    printf("        if 0, no printing \n");
    printf("        if 1, print error summary only if tests fail\n");
    printf("        if 2, print error summary for each n\n");
    printf("        if 3, print complete info each test fails \n");
    printf("<test_prob>: probability of preforming a given \n");
    printf("           test case: 0.0 does no tests, 1.0 does all tests\n");
    return -1;
  } else {
    nsizes = atoi(argv[1]);
    ntests = atoi(argv[2]);
    thresh = atof(argv[3]);
    debug = atoi(argv[4]);
    test_prob = atof(argv[5]);
  }

  seed = 1999;

  if (nsizes < 0 || ntests < 0 || debug < 0 || debug > 3)
    BLAS_error("Testing waxpby", 0, 0, NULL);

  printf("Testing %s...\n", base_routine);
  printf("INPUT: nsizes = %d, ntests = %d, thresh = %4.2f, debug = %d\n\n",
	 nsizes, ntests, thresh, debug);





  fname = "BLAS_swaxpby";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_swaxpby(n, ntests, &seed, thresh, debug, test_prob,
		      &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_dwaxpby";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_dwaxpby(n, ntests, &seed, thresh, debug, test_prob,
		      &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_cwaxpby";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_cwaxpby(n, ntests, &seed, thresh, debug, test_prob,
		      &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby(n, ntests, &seed, thresh, debug, test_prob,
		      &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_dwaxpby_d_s";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_dwaxpby_d_s(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_dwaxpby_s_d";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_dwaxpby_s_d(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_dwaxpby_s_s";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_dwaxpby_s_s(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_z_c";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_z_c(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_c_z";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_c_z(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_c_c";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_c_c(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_cwaxpby_c_s";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_cwaxpby_c_s(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_cwaxpby_s_c";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_cwaxpby_s_c(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_cwaxpby_s_s";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_cwaxpby_s_s(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_z_d";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_z_d(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_d_z";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_d_z(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_d_d";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_d_d(n, ntests, &seed, thresh, debug, test_prob,
			  &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_swaxpby_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_swaxpby_x(n, ntests, &seed, thresh, debug, test_prob,
			&total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_dwaxpby_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_dwaxpby_x(n, ntests, &seed, thresh, debug, test_prob,
			&total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_cwaxpby_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_cwaxpby_x(n, ntests, &seed, thresh, debug, test_prob,
			&total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_x(n, ntests, &seed, thresh, debug, test_prob,
			&total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_dwaxpby_d_s_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_dwaxpby_d_s_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_dwaxpby_s_d_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_dwaxpby_s_d_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_dwaxpby_s_s_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_dwaxpby_s_s_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_z_c_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_z_c_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_c_z_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_c_z_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_c_c_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_c_c_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_cwaxpby_c_s_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_cwaxpby_c_s_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_cwaxpby_s_c_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_cwaxpby_s_c_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_cwaxpby_s_s_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_cwaxpby_s_s_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_z_d_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_z_d_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_d_z_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_d_z_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  fname = "BLAS_zwaxpby_d_d_x";
  printf("Testing %s...\n", fname);
  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  for (n = 0; n <= nsizes; n++) {

    total_max_ratio =
      do_test_zwaxpby_d_d_x(n, ntests, &seed, thresh, debug, test_prob,
			    &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    printf("FAIL> ");
    nr_failed_routines++;
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);



  printf("\n");
  if (nr_failed_routines)
    printf("FAILED ");
  else
    printf("PASSED ");
  printf("%-10s: FAIL/TOTAL = %d/%d\n",
	 base_routine, nr_failed_routines, nr_routines);

  return 0;
}

