#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "blas_extended.h"
#include "blas_extended_private.h"
#include "blas_extended_test.h"

double do_test_sgemv2(int m, int n, int ntests, int *seed, double thresh,
		      int debug, float test_prob, double *min_ratio,
		      int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_sgemv2";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha;
  float beta;
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_sgemv2_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_sgemv2_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_sgemv2_testgen(norm, order_type, trans_type, m, n,
					&alpha, alpha_flag, A, lda,
					head_x_gen, tail_x_gen, &beta,
					beta_flag, y_gen, seed, head_r_true,
					tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;


		    scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;


		      scopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_sgemv2 */
		      FPU_FIX_STOP;
		      BLAS_sgemv2(order_type, trans_type, m, n, alpha, A, lda,
				  head_x, tail_x, incx_val, beta, y,
				  incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  sge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_sdot2(n_i, blas_no_conj, alpha, beta,
					  y_gen[k], y[iy], head_r_true[k],
					  tail_r_true[k], temp, 1, head_x,
					  tail_x, incx_val, eps_int, un_int,
					  &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  sge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  sprint_vector(head_x, n_i, incx_val, "head_x");
			  sprint_vector(tail_x, n_i, incx_val, "tail_x");
			  sprint_vector(y_gen, m_i, 1, "y_gen");
			  sprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("%16.8e", alpha);
			  printf("\n      ");
			  printf("beta = ");
			  printf("%16.8e", beta);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("[%24.16e, %24.16e]", head_r_true[j],
				   tail_r_true[j]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_dgemv2(int m, int n, int ntests, int *seed, double thresh,
		      int debug, float test_prob, double *min_ratio,
		      int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dgemv2";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha;
  double beta;
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_dgemv2_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_dgemv2_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_dgemv2_testgen(norm, order_type, trans_type, m, n,
					&alpha, alpha_flag, A, lda,
					head_x_gen, tail_x_gen, &beta,
					beta_flag, y_gen, seed, head_r_true,
					tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;


		    dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;


		      dcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_dgemv2 */
		      FPU_FIX_STOP;
		      BLAS_dgemv2(order_type, trans_type, m, n, alpha, A, lda,
				  head_x, tail_x, incx_val, beta, y,
				  incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  dge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_ddot2(n_i, blas_no_conj, alpha, beta,
					  y_gen[k], y[iy], head_r_true[k],
					  tail_r_true[k], temp, 1, head_x,
					  tail_x, incx_val, eps_int, un_int,
					  &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  dge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  dprint_vector(head_x, n_i, incx_val, "head_x");
			  dprint_vector(tail_x, n_i, incx_val, "tail_x");
			  dprint_vector(y_gen, m_i, 1, "y_gen");
			  dprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("%24.16e", alpha);
			  printf("\n      ");
			  printf("beta = ");
			  printf("%24.16e", beta);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("[%24.16e, %24.16e]", head_r_true[j],
				   tail_r_true[j]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_cgemv2(int m, int n, int ntests, int *seed, double thresh,
		      int debug, float test_prob, double *min_ratio,
		      int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cgemv2";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha[2];
  float beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_cgemv2_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_cgemv2_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
			  2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_cgemv2_testgen(norm, order_type, trans_type, m, n,
					&alpha, alpha_flag, A, lda,
					head_x_gen, tail_x_gen, &beta,
					beta_flag, y_gen, seed, head_r_true,
					tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;
		    incx *= 2;

		    ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      ccopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_cgemv2 */
		      FPU_FIX_STOP;
		      BLAS_cgemv2(order_type, trans_type, m, n, alpha, A, lda,
				  head_x, tail_x, incx_val, beta, y,
				  incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  cge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_cdot2(n_i, blas_no_conj, alpha, beta,
					  &y_gen[k], &y[iy], &head_r_true[k],
					  &tail_r_true[k], temp, 1, head_x,
					  tail_x, incx_val, eps_int, un_int,
					  &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  cge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  cprint_vector(head_x, n_i, incx_val, "head_x");
			  cprint_vector(tail_x, n_i, incx_val, "tail_x");
			  cprint_vector(y_gen, m_i, 1, "y_gen");
			  cprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2(int m, int n, int ntests, int *seed, double thresh,
		      int debug, float test_prob, double *min_ratio,
		      int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
			   2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_zgemv2_testgen(norm, order_type, trans_type, m, n,
					&alpha, alpha_flag, A, lda,
					head_x_gen, tail_x_gen, &beta,
					beta_flag, y_gen, seed, head_r_true,
					tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;
		    incx *= 2;

		    zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      zcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_zgemv2 */
		      FPU_FIX_STOP;
		      BLAS_zgemv2(order_type, trans_type, m, n, alpha, A, lda,
				  head_x, tail_x, incx_val, beta, y,
				  incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  zge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_zdot2(n_i, blas_no_conj, alpha, beta,
					  &y_gen[k], &y[iy], &head_r_true[k],
					  &tail_r_true[k], temp, 1, head_x,
					  tail_x, incx_val, eps_int, un_int,
					  &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  zge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  zprint_vector(head_x, n_i, incx_val, "head_x");
			  zprint_vector(tail_x, n_i, incx_val, "tail_x");
			  zprint_vector(y_gen, m_i, 1, "y_gen");
			  zprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_dgemv2_d_s(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dgemv2_d_s";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha;
  double beta;
  double *A;
  float *head_x;
  float *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_dgemv2_d_s_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_dgemv2_d_s_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_dgemv2_d_s_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;


		    scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;


		      dcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_dgemv2_d_s */
		      FPU_FIX_STOP;
		      BLAS_dgemv2_d_s(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  dge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_ddot2_d_s(n_i, blas_no_conj, alpha, beta,
					      y_gen[k], y[iy], head_r_true[k],
					      tail_r_true[k], temp, 1, head_x,
					      tail_x, incx_val, eps_int,
					      un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  dge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  sprint_vector(head_x, n_i, incx_val, "head_x");
			  sprint_vector(tail_x, n_i, incx_val, "tail_x");
			  dprint_vector(y_gen, m_i, 1, "y_gen");
			  dprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("%24.16e", alpha);
			  printf("\n      ");
			  printf("beta = ");
			  printf("%24.16e", beta);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("[%24.16e, %24.16e]", head_r_true[j],
				   tail_r_true[j]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_dgemv2_s_d(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dgemv2_s_d";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha;
  double beta;
  float *A;
  double *head_x;
  double *tail_x;
  double *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_dgemv2_s_d_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_dgemv2_s_d_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_dgemv2_s_d_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;


		    dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;


		      dcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_dgemv2_s_d */
		      FPU_FIX_STOP;
		      BLAS_dgemv2_s_d(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  sge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_ddot2_s_d(n_i, blas_no_conj, alpha, beta,
					      y_gen[k], y[iy], head_r_true[k],
					      tail_r_true[k], temp, 1, head_x,
					      tail_x, incx_val, eps_int,
					      un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  sge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  dprint_vector(head_x, n_i, incx_val, "head_x");
			  dprint_vector(tail_x, n_i, incx_val, "tail_x");
			  dprint_vector(y_gen, m_i, 1, "y_gen");
			  dprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("%24.16e", alpha);
			  printf("\n      ");
			  printf("beta = ");
			  printf("%24.16e", beta);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("[%24.16e, %24.16e]", head_r_true[j],
				   tail_r_true[j]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_dgemv2_s_s(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dgemv2_s_s";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha;
  double beta;
  float *A;
  float *head_x;
  float *tail_x;
  double *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_dgemv2_s_s_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_dgemv2_s_s_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_dgemv2_s_s_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;


		    scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;


		      dcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_dgemv2_s_s */
		      FPU_FIX_STOP;
		      BLAS_dgemv2_s_s(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  sge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_ddot2_s_s(n_i, blas_no_conj, alpha, beta,
					      y_gen[k], y[iy], head_r_true[k],
					      tail_r_true[k], temp, 1, head_x,
					      tail_x, incx_val, eps_int,
					      un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  sge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  sprint_vector(head_x, n_i, incx_val, "head_x");
			  sprint_vector(tail_x, n_i, incx_val, "tail_x");
			  dprint_vector(y_gen, m_i, 1, "y_gen");
			  dprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("%24.16e", alpha);
			  printf("\n      ");
			  printf("beta = ");
			  printf("%24.16e", beta);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("[%24.16e, %24.16e]", head_r_true[j],
				   tail_r_true[j]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_z_c(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_z_c";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  float *head_x;
  float *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_z_c_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_z_c_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
			   2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_zgemv2_z_c_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;
		    incx *= 2;

		    ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      zcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_zgemv2_z_c */
		      FPU_FIX_STOP;
		      BLAS_zgemv2_z_c(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  zge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_zdot2_z_c(n_i, blas_no_conj, alpha, beta,
					      &y_gen[k], &y[iy],
					      &head_r_true[k],
					      &tail_r_true[k], temp, 1,
					      head_x, tail_x, incx_val,
					      eps_int, un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  zge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  cprint_vector(head_x, n_i, incx_val, "head_x");
			  cprint_vector(tail_x, n_i, incx_val, "tail_x");
			  zprint_vector(y_gen, m_i, 1, "y_gen");
			  zprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_c_z(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_c_z";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  float *A;
  double *head_x;
  double *tail_x;
  double *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_c_z_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_c_z_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
			  2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_zgemv2_c_z_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;
		    incx *= 2;

		    zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      zcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_zgemv2_c_z */
		      FPU_FIX_STOP;
		      BLAS_zgemv2_c_z(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  cge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_zdot2_c_z(n_i, blas_no_conj, alpha, beta,
					      &y_gen[k], &y[iy],
					      &head_r_true[k],
					      &tail_r_true[k], temp, 1,
					      head_x, tail_x, incx_val,
					      eps_int, un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  cge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  zprint_vector(head_x, n_i, incx_val, "head_x");
			  zprint_vector(tail_x, n_i, incx_val, "tail_x");
			  zprint_vector(y_gen, m_i, 1, "y_gen");
			  zprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_c_c(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_c_c";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  double *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_c_c_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_c_c_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
			  2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_zgemv2_c_c_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;
		    incx *= 2;

		    ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      zcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_zgemv2_c_c */
		      FPU_FIX_STOP;
		      BLAS_zgemv2_c_c(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  cge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_zdot2_c_c(n_i, blas_no_conj, alpha, beta,
					      &y_gen[k], &y[iy],
					      &head_r_true[k],
					      &tail_r_true[k], temp, 1,
					      head_x, tail_x, incx_val,
					      eps_int, un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  cge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  cprint_vector(head_x, n_i, incx_val, "head_x");
			  cprint_vector(tail_x, n_i, incx_val, "tail_x");
			  zprint_vector(y_gen, m_i, 1, "y_gen");
			  zprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_cgemv2_c_s(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cgemv2_c_s";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha[2];
  float beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_cgemv2_c_s_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_cgemv2_c_s_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
			  2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_cgemv2_c_s_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;


		    scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      ccopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_cgemv2_c_s */
		      FPU_FIX_STOP;
		      BLAS_cgemv2_c_s(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  cge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_cdot2_c_s(n_i, blas_no_conj, alpha, beta,
					      &y_gen[k], &y[iy],
					      &head_r_true[k],
					      &tail_r_true[k], temp, 1,
					      head_x, tail_x, incx_val,
					      eps_int, un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  cge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  sprint_vector(head_x, n_i, incx_val, "head_x");
			  sprint_vector(tail_x, n_i, incx_val, "tail_x");
			  cprint_vector(y_gen, m_i, 1, "y_gen");
			  cprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_cgemv2_s_c(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cgemv2_s_c";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha[2];
  float beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_cgemv2_s_c_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_cgemv2_s_c_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_cgemv2_s_c_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;
		    incx *= 2;

		    ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      ccopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_cgemv2_s_c */
		      FPU_FIX_STOP;
		      BLAS_cgemv2_s_c(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  sge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_cdot2_s_c(n_i, blas_no_conj, alpha, beta,
					      &y_gen[k], &y[iy],
					      &head_r_true[k],
					      &tail_r_true[k], temp, 1,
					      head_x, tail_x, incx_val,
					      eps_int, un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  sge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  cprint_vector(head_x, n_i, incx_val, "head_x");
			  cprint_vector(tail_x, n_i, incx_val, "tail_x");
			  cprint_vector(y_gen, m_i, 1, "y_gen");
			  cprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_cgemv2_s_s(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cgemv2_s_s";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha[2];
  float beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_cgemv2_s_s_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_cgemv2_s_s_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_S);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	prec = blas_prec_single;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_cgemv2_s_s_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;


		    scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      ccopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_cgemv2_s_s */
		      FPU_FIX_STOP;
		      BLAS_cgemv2_s_s(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  sge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_cdot2_s_s(n_i, blas_no_conj, alpha, beta,
					      &y_gen[k], &y[iy],
					      &head_r_true[k],
					      &tail_r_true[k], temp, 1,
					      head_x, tail_x, incx_val,
					      eps_int, un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  sge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  sprint_vector(head_x, n_i, incx_val, "head_x");
			  sprint_vector(tail_x, n_i, incx_val, "tail_x");
			  cprint_vector(y_gen, m_i, 1, "y_gen");
			  cprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_z_d(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_z_d";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_z_d_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_z_d_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
			   2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_zgemv2_z_d_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;


		    dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      zcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_zgemv2_z_d */
		      FPU_FIX_STOP;
		      BLAS_zgemv2_z_d(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  zge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_zdot2_z_d(n_i, blas_no_conj, alpha, beta,
					      &y_gen[k], &y[iy],
					      &head_r_true[k],
					      &tail_r_true[k], temp, 1,
					      head_x, tail_x, incx_val,
					      eps_int, un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  zge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  dprint_vector(head_x, n_i, incx_val, "head_x");
			  dprint_vector(tail_x, n_i, incx_val, "tail_x");
			  zprint_vector(y_gen, m_i, 1, "y_gen");
			  zprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_d_z(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_d_z";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_d_z_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_d_z_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_zgemv2_d_z_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;
		    incx *= 2;

		    zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      zcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_zgemv2_d_z */
		      FPU_FIX_STOP;
		      BLAS_zgemv2_d_z(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  dge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_zdot2_d_z(n_i, blas_no_conj, alpha, beta,
					      &y_gen[k], &y[iy],
					      &head_r_true[k],
					      &tail_r_true[k], temp, 1,
					      head_x, tail_x, incx_val,
					      eps_int, un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  dge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  zprint_vector(head_x, n_i, incx_val, "head_x");
			  zprint_vector(tail_x, n_i, incx_val, "tail_x");
			  zprint_vector(y_gen, m_i, 1, "y_gen");
			  zprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_d_d(int m, int n, int ntests, int *seed, double thresh,
			  int debug, float test_prob, double *min_ratio,
			  int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random

 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_d_d";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_d_d_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_d_d_testgen */
  int order_val;
  enum blas_order_type order_type;

  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	eps_int = power(2, -BITS_D);
	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	prec = blas_prec_double;

	/* values near underflow, 1, or overflow */
	for (norm = -1; norm <= 1; norm++) {

	  /* number of tests */
	  for (i = 0; i < ntests; i++) {

	    /* row or col major */
	    for (order_val = 0; order_val < 2; order_val++) {
	      switch (order_val) {
	      case 0:
		order_type = blas_rowmajor;
		break;
	      case 1:
	      default:
		order_type = blas_colmajor;
		break;
	      }

	      /* no_trans, trans, or conj_trans */
	      for (trans_val = 0; trans_val < 3; trans_val++) {
		switch (trans_val) {
		case 0:
		  trans_type = blas_no_trans;
		  m_i = m;
		  n_i = n;
		  break;
		case 1:
		  trans_type = blas_trans;
		  m_i = n;
		  n_i = m;
		  break;
		case 2:
		default:
		  trans_type = blas_conj_trans;
		  m_i = n;
		  n_i = m;
		  break;
		}

		/* lda=n, n+1, or 2n */
		for (lda_val = 0; lda_val < 3; lda_val++) {
		  switch (lda_val) {
		  case 0:
		    lda = m_i;
		    break;
		  case 1:
		    lda = m_i + 1;
		    break;
		  case 2:
		  default:
		    lda = 2 * m_i;
		    break;
		  }
		  if ((order_type == blas_rowmajor && lda < n) ||
		      (order_type == blas_colmajor && lda < m))
		    continue;

		  /* For the sake of speed, we throw out this case at random */
		  if (xrand(seed) >= test_prob)
		    continue;

		  /* in the trivial cases, no need to run testgen */
		  if (m > 0 && n > 0)
		    BLAS_zgemv2_d_d_testgen(norm, order_type, trans_type, m,
					    n, &alpha, alpha_flag, A, lda,
					    head_x_gen, tail_x_gen, &beta,
					    beta_flag, y_gen, seed,
					    head_r_true, tail_r_true);

		  count++;

		  /* varying incx */
		  for (incx_val = -2; incx_val <= 2; incx_val++) {
		    if (incx_val == 0)
		      continue;

		    /* setting incx */
		    incx = incx_val;


		    dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		    dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		    /* varying incy */
		    for (incy_val = -2; incy_val <= 2; incy_val++) {
		      if (incy_val == 0)
			continue;

		      /* setting incy */
		      incy = incy_val;
		      incy *= 2;

		      zcopy_vector(y_gen, m_i, 1, y, incy_val);

		      /* call BLAS_zgemv2_d_d */
		      FPU_FIX_STOP;
		      BLAS_zgemv2_d_d(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val);
		      FPU_FIX_START;

		      /* set y starting index */
		      iy = 0;
		      if (incy < 0)
			iy = -(m_i - 1) * incy;

		      /* computing the ratio */
		      if (m > 0 && n > 0)
			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			  /* copy row j of A to temp */
			  dge_copy_row(order_type, trans_type, m_i, n_i, A,
				       lda, temp, j);

			  test_BLAS_zdot2_d_d(n_i, blas_no_conj, alpha, beta,
					      &y_gen[k], &y[iy],
					      &head_r_true[k],
					      &tail_r_true[k], temp, 1,
					      head_x, tail_x, incx_val,
					      eps_int, un_int, &ratios[j]);

			  /* take the max ratio */
			  if (j == 0) {
			    ratio = ratios[0];
			    /* The !<= below causes NaN error to be detected.
			       Note that (NaN > thresh) is always false. */
			  } else if (!(ratios[j] <= ratio)) {
			    ratio = ratios[j];
			  }
			  iy += incy;
			}

		      /* Increase the number of bad ratio, if the ratio
		         is bigger than the threshold.
		         The !<= below causes NaN error to be detected.
		         Note that (NaN > thresh) is always false. */
		      if (!(ratio <= thresh)) {
			bad_ratios++;

			if ((debug == 3) &&	/* print only when debug is on */
			    (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			    (d_count == find_max_ratio) &&
			    (p_count <= max_print) &&
			    (ratio > 0.5 * ratio_max)) {
			  old_count = count;

			  printf
			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			     fname, m, n, ntests, thresh);

			  /* Print test info */
			  switch (prec) {
			  case blas_prec_single:
			    printf("single ");
			    break;
			  case blas_prec_double:
			    printf("double ");
			    break;
			  case blas_prec_indigenous:
			    printf("indigenous ");
			    break;
			  case blas_prec_extra:
			    printf("extra ");
			    break;
			  }
			  switch (norm) {
			  case -1:
			    printf("near_underflow ");
			    break;
			  case 0:
			    printf("near_one ");
			    break;
			  case 1:
			    printf("near_overflow ");
			    break;
			  }
			  switch (order_type) {
			  case blas_rowmajor:
			    printf("row_major ");
			    break;
			  case blas_colmajor:
			    printf("col_major ");
			    break;
			  }
			  switch (trans_type) {
			  case blas_no_trans:
			    printf("no_trans ");
			    break;
			  case blas_trans:
			    printf("trans ");
			    break;
			  case blas_conj_trans:
			    printf("conj_trans ");
			    break;
			  }

			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				 incy);

			  dge_print_matrix(A, m_i, n_i, lda, order_type, "A");

			  dprint_vector(head_x, n_i, incx_val, "head_x");
			  dprint_vector(tail_x, n_i, incx_val, "tail_x");
			  zprint_vector(y_gen, m_i, 1, "y_gen");
			  zprint_vector(y, m_i, incy_val, "y_final");

			  printf("      ");
			  printf("alpha = ");
			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			  printf("\n      ");
			  printf("beta = ");
			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			  printf("\n");
			  for (j = 0, k = 0; j < m_i * incy_gen;
			       j += incy_gen, k++) {
			    printf("      ");
			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
				   head_r_true[j], tail_r_true[j],
				   head_r_true[j + 1], tail_r_true[j + 1]);
			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			  }

			  printf("      ratio=%.4e\n", ratio);
			  p_count++;
			}
			if (bad_ratios >= MAX_BAD_TESTS) {
			  printf("\ntoo many failures, exiting....");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			  printf("\nFlagrant ratio error, exiting...");
			  printf("\nTesting and compilation");
			  printf(" are incomplete\n\n");
			  goto end;
			}
		      }
		      if (d_count == 0) {
			if (ratio > ratio_max)
			  ratio_max = ratio;

			if (ratio != 0.0 && ratio < ratio_min)
			  ratio_min = ratio;

			tot_tests++;
		      }
		    }		/* incy */
		  }		/* incx */
		}		/* lda */
	      }			/* trans */
	    }			/* order */
	  }			/* tests */
	}			/* norm */

      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_sgemv2_x(int m, int n, int ntests, int *seed, double thresh,
			int debug, float test_prob, double *min_ratio,
			int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_sgemv2_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha;
  float beta;
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_sgemv2_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_sgemv2_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_S);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	    prec = blas_prec_single;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_sgemv2_testgen(norm, order_type, trans_type, m, n,
					  &alpha, alpha_flag, A, lda,
					  head_x_gen, tail_x_gen, &beta,
					  beta_flag, y_gen, seed, head_r_true,
					  tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;


		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;


			scopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_sgemv2_x */
			FPU_FIX_STOP;
			BLAS_sgemv2_x(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_sdot2(n_i, blas_no_conj, alpha, beta,
					    y_gen[k], y[iy], head_r_true[k],
					    tail_r_true[k], temp, 1, head_x,
					    tail_x, incx_val, eps_int, un_int,
					    &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    sge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    sprint_vector(head_x, n_i, incx_val, "head_x");
			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
			    sprint_vector(y_gen, m_i, 1, "y_gen");
			    sprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("%16.8e", alpha);
			    printf("\n      ");
			    printf("beta = ");
			    printf("%16.8e", beta);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf("[%24.16e, %24.16e]", head_r_true[j],
				     tail_r_true[j]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_dgemv2_x(int m, int n, int ntests, int *seed, double thresh,
			int debug, float test_prob, double *min_ratio,
			int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dgemv2_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha;
  double beta;
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_dgemv2_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_dgemv2_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_dgemv2_testgen(norm, order_type, trans_type, m, n,
					  &alpha, alpha_flag, A, lda,
					  head_x_gen, tail_x_gen, &beta,
					  beta_flag, y_gen, seed, head_r_true,
					  tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;


		      dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;


			dcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_dgemv2_x */
			FPU_FIX_STOP;
			BLAS_dgemv2_x(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    dge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_ddot2(n_i, blas_no_conj, alpha, beta,
					    y_gen[k], y[iy], head_r_true[k],
					    tail_r_true[k], temp, 1, head_x,
					    tail_x, incx_val, eps_int, un_int,
					    &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    dge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    dprint_vector(head_x, n_i, incx_val, "head_x");
			    dprint_vector(tail_x, n_i, incx_val, "tail_x");
			    dprint_vector(y_gen, m_i, 1, "y_gen");
			    dprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("%24.16e", alpha);
			    printf("\n      ");
			    printf("beta = ");
			    printf("%24.16e", beta);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf("[%24.16e, %24.16e]", head_r_true[j],
				     tail_r_true[j]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_cgemv2_x(int m, int n, int ntests, int *seed, double thresh,
			int debug, float test_prob, double *min_ratio,
			int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cgemv2_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha[2];
  float beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_cgemv2_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_cgemv2_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
			  2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_S);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	    prec = blas_prec_single;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_cgemv2_testgen(norm, order_type, trans_type, m, n,
					  &alpha, alpha_flag, A, lda,
					  head_x_gen, tail_x_gen, &beta,
					  beta_flag, y_gen, seed, head_r_true,
					  tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;
		      incx *= 2;

		      ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			ccopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_cgemv2_x */
			FPU_FIX_STOP;
			BLAS_cgemv2_x(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    cge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_cdot2(n_i, blas_no_conj, alpha, beta,
					    &y_gen[k], &y[iy],
					    &head_r_true[k], &tail_r_true[k],
					    temp, 1, head_x, tail_x, incx_val,
					    eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    cge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    cprint_vector(head_x, n_i, incx_val, "head_x");
			    cprint_vector(tail_x, n_i, incx_val, "tail_x");
			    cprint_vector(y_gen, m_i, 1, "y_gen");
			    cprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_x(int m, int n, int ntests, int *seed, double thresh,
			int debug, float test_prob, double *min_ratio,
			int *num_bad_ratio, int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
			   2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_zgemv2_testgen(norm, order_type, trans_type, m, n,
					  &alpha, alpha_flag, A, lda,
					  head_x_gen, tail_x_gen, &beta,
					  beta_flag, y_gen, seed, head_r_true,
					  tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;
		      incx *= 2;

		      zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			zcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_zgemv2_x */
			FPU_FIX_STOP;
			BLAS_zgemv2_x(order_type, trans_type, m, n, alpha, A,
				      lda, head_x, tail_x, incx_val, beta, y,
				      incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    zge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_zdot2(n_i, blas_no_conj, alpha, beta,
					    &y_gen[k], &y[iy],
					    &head_r_true[k], &tail_r_true[k],
					    temp, 1, head_x, tail_x, incx_val,
					    eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    zge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    zprint_vector(head_x, n_i, incx_val, "head_x");
			    zprint_vector(tail_x, n_i, incx_val, "tail_x");
			    zprint_vector(y_gen, m_i, 1, "y_gen");
			    zprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_dgemv2_d_s_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dgemv2_d_s_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha;
  double beta;
  double *A;
  float *head_x;
  float *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_dgemv2_d_s_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_dgemv2_d_s_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_dgemv2_d_s_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;


		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;


			dcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_dgemv2_d_s_x */
			FPU_FIX_STOP;
			BLAS_dgemv2_d_s_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    dge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_ddot2_d_s(n_i, blas_no_conj, alpha,
						beta, y_gen[k], y[iy],
						head_r_true[k],
						tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    dge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    sprint_vector(head_x, n_i, incx_val, "head_x");
			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
			    dprint_vector(y_gen, m_i, 1, "y_gen");
			    dprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("%24.16e", alpha);
			    printf("\n      ");
			    printf("beta = ");
			    printf("%24.16e", beta);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf("[%24.16e, %24.16e]", head_r_true[j],
				     tail_r_true[j]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_dgemv2_s_d_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dgemv2_s_d_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha;
  double beta;
  float *A;
  double *head_x;
  double *tail_x;
  double *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_dgemv2_s_d_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_dgemv2_s_d_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_dgemv2_s_d_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;


		      dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;


			dcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_dgemv2_s_d_x */
			FPU_FIX_STOP;
			BLAS_dgemv2_s_d_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_ddot2_s_d(n_i, blas_no_conj, alpha,
						beta, y_gen[k], y[iy],
						head_r_true[k],
						tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    sge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    dprint_vector(head_x, n_i, incx_val, "head_x");
			    dprint_vector(tail_x, n_i, incx_val, "tail_x");
			    dprint_vector(y_gen, m_i, 1, "y_gen");
			    dprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("%24.16e", alpha);
			    printf("\n      ");
			    printf("beta = ");
			    printf("%24.16e", beta);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf("[%24.16e, %24.16e]", head_r_true[j],
				     tail_r_true[j]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_dgemv2_s_s_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_dgemv2_s_s_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha;
  double beta;
  float *A;
  float *head_x;
  float *tail_x;
  double *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;
  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_dgemv2_s_s_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_dgemv2_s_s_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;


  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha = 1.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta = 1.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_dgemv2_s_s_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;


		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;


			dcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_dgemv2_s_s_x */
			FPU_FIX_STOP;
			BLAS_dgemv2_s_s_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_ddot2_s_s(n_i, blas_no_conj, alpha,
						beta, y_gen[k], y[iy],
						head_r_true[k],
						tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    sge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    sprint_vector(head_x, n_i, incx_val, "head_x");
			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
			    dprint_vector(y_gen, m_i, 1, "y_gen");
			    dprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("%24.16e", alpha);
			    printf("\n      ");
			    printf("beta = ");
			    printf("%24.16e", beta);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf("[%24.16e, %24.16e]", head_r_true[j],
				     tail_r_true[j]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_z_c_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_z_c_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  float *head_x;
  float *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_z_c_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_z_c_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
			   2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_zgemv2_z_c_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;
		      incx *= 2;

		      ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			zcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_zgemv2_z_c_x */
			FPU_FIX_STOP;
			BLAS_zgemv2_z_c_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    zge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_zdot2_z_c(n_i, blas_no_conj, alpha,
						beta, &y_gen[k], &y[iy],
						&head_r_true[k],
						&tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    zge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    cprint_vector(head_x, n_i, incx_val, "head_x");
			    cprint_vector(tail_x, n_i, incx_val, "tail_x");
			    zprint_vector(y_gen, m_i, 1, "y_gen");
			    zprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_c_z_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_c_z_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  float *A;
  double *head_x;
  double *tail_x;
  double *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_c_z_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_c_z_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
			  2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_zgemv2_c_z_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;
		      incx *= 2;

		      zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			zcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_zgemv2_c_z_x */
			FPU_FIX_STOP;
			BLAS_zgemv2_c_z_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    cge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_zdot2_c_z(n_i, blas_no_conj, alpha,
						beta, &y_gen[k], &y[iy],
						&head_r_true[k],
						&tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    cge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    zprint_vector(head_x, n_i, incx_val, "head_x");
			    zprint_vector(tail_x, n_i, incx_val, "tail_x");
			    zprint_vector(y_gen, m_i, 1, "y_gen");
			    zprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_c_c_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_c_c_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  double *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_c_c_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_c_c_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
			  2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_zgemv2_c_c_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;
		      incx *= 2;

		      ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			zcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_zgemv2_c_c_x */
			FPU_FIX_STOP;
			BLAS_zgemv2_c_c_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    cge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_zdot2_c_c(n_i, blas_no_conj, alpha,
						beta, &y_gen[k], &y[iy],
						&head_r_true[k],
						&tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    cge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    cprint_vector(head_x, n_i, incx_val, "head_x");
			    cprint_vector(tail_x, n_i, incx_val, "tail_x");
			    zprint_vector(y_gen, m_i, 1, "y_gen");
			    zprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_cgemv2_c_s_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cgemv2_c_s_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha[2];
  float beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_cgemv2_c_s_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_cgemv2_c_s_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
			  2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_S);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	    prec = blas_prec_single;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_cgemv2_c_s_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;


		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			ccopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_cgemv2_c_s_x */
			FPU_FIX_STOP;
			BLAS_cgemv2_c_s_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    cge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_cdot2_c_s(n_i, blas_no_conj, alpha,
						beta, &y_gen[k], &y[iy],
						&head_r_true[k],
						&tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    cge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    sprint_vector(head_x, n_i, incx_val, "head_x");
			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
			    cprint_vector(y_gen, m_i, 1, "y_gen");
			    cprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_cgemv2_s_c_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cgemv2_s_c_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha[2];
  float beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_cgemv2_s_c_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_cgemv2_s_c_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_S);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	    prec = blas_prec_single;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_cgemv2_s_c_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;
		      incx *= 2;

		      ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			ccopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_cgemv2_s_c_x */
			FPU_FIX_STOP;
			BLAS_cgemv2_s_c_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_cdot2_s_c(n_i, blas_no_conj, alpha,
						beta, &y_gen[k], &y[iy],
						&head_r_true[k],
						&tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    sge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    cprint_vector(head_x, n_i, incx_val, "head_x");
			    cprint_vector(tail_x, n_i, incx_val, "tail_x");
			    cprint_vector(y_gen, m_i, 1, "y_gen");
			    cprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_cgemv2_s_s_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_cgemv2_s_s_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  float alpha[2];
  float beta[2];
  float *A;
  float *head_x;
  float *tail_x;
  float *y;
  float *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  float *head_x_gen;
  float *tail_x_gen;
  float *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_cgemv2_s_s_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_cgemv2_s_s_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (float *) blas_malloc(max_mn * sizeof(float));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_S);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
	    prec = blas_prec_single;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_cgemv2_s_s_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;


		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			ccopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_cgemv2_s_s_x */
			FPU_FIX_STOP;
			BLAS_cgemv2_s_s_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_cdot2_s_s(n_i, blas_no_conj, alpha,
						beta, &y_gen[k], &y[iy],
						&head_r_true[k],
						&tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    sge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    sprint_vector(head_x, n_i, incx_val, "head_x");
			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
			    cprint_vector(y_gen, m_i, 1, "y_gen");
			    cprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_z_d_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_z_d_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_z_d_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_z_d_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
			   2);
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_zgemv2_z_d_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;


		      dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			zcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_zgemv2_z_d_x */
			FPU_FIX_STOP;
			BLAS_zgemv2_z_d_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    zge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_zdot2_z_d(n_i, blas_no_conj, alpha,
						beta, &y_gen[k], &y[iy],
						&head_r_true[k],
						&tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    zge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    dprint_vector(head_x, n_i, incx_val, "head_x");
			    dprint_vector(tail_x, n_i, incx_val, "tail_x");
			    zprint_vector(y_gen, m_i, 1, "y_gen");
			    zprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_d_z_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_d_z_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_d_z_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_d_z_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_zgemv2_d_z_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;
		      incx *= 2;

		      zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			zcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_zgemv2_d_z_x */
			FPU_FIX_STOP;
			BLAS_zgemv2_d_z_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    dge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_zdot2_d_z(n_i, blas_no_conj, alpha,
						beta, &y_gen[k], &y[iy],
						&head_r_true[k],
						&tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    dge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    zprint_vector(head_x, n_i, incx_val, "head_x");
			    zprint_vector(tail_x, n_i, incx_val, "tail_x");
			    zprint_vector(y_gen, m_i, 1, "y_gen");
			    zprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}
double do_test_zgemv2_d_d_x(int m, int n, int ntests, int *seed,
			    double thresh, int debug, float test_prob,
			    double *min_ratio, int *num_bad_ratio,
			    int *num_tests)

/*
 * Purpose  
 * =======
 *
 * Runs a series of tests on GEMV2.
 *
 * Arguments
 * =========
 *  
 * m         (input) int
 *           The number of rows
 *
 * n         (input) int
 *           The number of columns
 *
 * ntests    (input) int
 *           The number of tests to run for each set of attributes.
 *
 * seed      (input/output) int         
 *           The seed for the random number generator used in testgen().
 *
 * thresh    (input) double
 *           When the ratio returned from test() exceeds the specified
 *           threshold, the current size, r_true, r_comp, and ratio will be
 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
 *           to ~10.)
 *
 * debug     (input) int
 *           If debug=3, print summary 
 *           If debug=2, print summary only if the number of bad ratios > 0
 *           If debug=1, print complete info if tests fail
 *           If debug=0, return max ratio
 *
 * test_prob (input) float
 *           The specified test will be performed only if the generated 
 *           random exceeds this threshold.
 *
 * min_ratio (output) double
 *           The minimum ratio
 * 
 * num_bad_ratio (output) int
 *               The number of tests fail; they are above the threshold.
 *
 * num_tests (output) int
 *           The number of tests is being performed.
 *
 * Return value
 * ============
 *
 * The maximum ratio if run successfully, otherwise return -1 
 *
 * Code structure
 * ==============
 * 
 *  debug loop  -- if debug is one, the first loop computes the max ratio
 *              -- and the last(second) loop outputs debugging information,
 *              -- if the test fail and its ratio > 0.5 * max ratio.
 *              -- if debug is zero, the loop is executed once
 *    alpha loop  -- varying alpha: 0, 1, or random
 *      beta loop   -- varying beta: 0, 1, or random
 *        prec loop   -- varying internal prec: single, double, or extra
 *          norm loop   -- varying norm: near undeflow, near one, or 
 *                        -- near overflow
 *            numtest loop  -- how many times the test is perform with 
 *                            -- above set of attributes
 *              order loop   -- varying order type: rowmajor or colmajor
 *                trans loop    -- varying uplo type: upper or lower
 *                  lda loop      -- varying lda: m, m+1, 2m 
 *                    incx loop     -- varying incx: -2, -1, 1, 2
 *                      incy loop     -- varying incy: -2, -1, 1, 2
 */
{
  /* function name */
  const char fname[] = "BLAS_zgemv2_d_d_x";

  /* max number of debug lines to print */
  const int max_print = 8;

  /* Variables in the "x_val" form are loop vars for corresponding
     variables */
  int i;			/* iterate through the repeating tests */
  int j, k;			/* multipurpose counters or variables */
  int iy;			/* use to index y */
  int incx_val, incy_val,	/* for testing different inc values */
    incx, incy;
  int incy_gen;			/* for complex case inc=2, for real case inc=1 */
  int d_count;			/* counter for debug */
  int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
  int p_count;			/* counter for the number of debug lines printed */
  int tot_tests;		/* total number of tests to be done */
  int norm;			/* input values of near underflow/one/overflow */
  double ratio_max;		/* the current maximum ratio */
  double ratio_min;		/* the current minimum ratio */
  double *ratios;		/* a temporary variable for calculating ratio */
  double ratio;			/* the per-use test ratio from test() */
  int bad_ratios;		/* the number of ratios over the threshold */
  double eps_int;		/* the internal epsilon expected--2^(-24) for float */
  double un_int;		/* the internal underflow threshold */
  double alpha[2];
  double beta[2];
  double *A;
  double *head_x;
  double *tail_x;
  double *y;
  double *temp;			/* use for calculating ratio */

  /* x_gen and y_gen are used to store vectors generated by testgen.
     they eventually are copied back to x and y */
  double *head_x_gen;
  double *tail_x_gen;
  double *y_gen;

  /* the true r calculated by testgen(), in double-double */
  double *head_r_true, *tail_r_true;

  int alpha_val;
  int alpha_flag;		/* input flag for BLAS_zgemv2_d_d_testgen */
  int beta_val;
  int beta_flag;		/* input flag for BLAS_zgemv2_d_d_testgen */
  int order_val;
  enum blas_order_type order_type;
  int prec_val;
  enum blas_prec_type prec;
  int trans_val;
  enum blas_trans_type trans_type;
  int m_i;
  int n_i;
  int max_mn;			/* the max of m and n */
  int lda_val;
  int lda;
  int saved_seed;		/* for saving the original seed */
  int count, old_count;		/* use for counting the number of testgen calls * 2 */

  FPU_FIX_DECL;

  /* test for bad arguments */
  if (n < 0 || m < 0 || ntests < 0)
    BLAS_error(fname, 0, 0, NULL);

  /* initialization */
  *num_bad_ratio = 0;
  *num_tests = 0;
  *min_ratio = 0.0;

  saved_seed = *seed;
  ratio_min = 1e308;
  ratio_max = 0.0;
  ratio = 0.0;
  tot_tests = 0;
  p_count = 0;
  count = 0;
  find_max_ratio = 0;
  bad_ratios = 0;
  old_count = 0;

  if (debug == 3)
    find_max_ratio = 1;
  max_mn = MAX(m, n);
  if (m == 0 || n == 0) {
    return 0.0;
  }

  FPU_FIX_START;

  incy_gen = 1;
  incy_gen *= 2;

  /* get space for calculation */
  head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && head_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
  if (max_mn * 2 > 0 && tail_x == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
  if (max_mn * 2 > 0 && y == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && head_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && tail_x_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && y_gen == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  temp = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && temp == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
  if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  ratios = (double *) blas_malloc(max_mn * sizeof(double));
  if (max_mn > 0 && ratios == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }
  A =
    (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
  if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
    BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
  }

  /* The debug iteration:
     If debug=1, then will execute the iteration twice. First, compute the
     max ratio. Second, print info if ratio > (50% * ratio_max). */
  for (d_count = 0; d_count <= find_max_ratio; d_count++) {
    bad_ratios = 0;		/* set to zero */

    if ((debug == 3) && (d_count == find_max_ratio))
      *seed = saved_seed;	/* restore the original seed */

    /* varying alpha */
    for (alpha_val = 0; alpha_val < 3; alpha_val++) {
      alpha_flag = 0;
      switch (alpha_val) {
      case 0:
	alpha[0] = alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      case 1:
	alpha[0] = 1.0;
	alpha[1] = 0.0;
	alpha_flag = 1;
	break;
      }

      /* varying beta */
      for (beta_val = 0; beta_val < 3; beta_val++) {
	beta_flag = 0;
	switch (beta_val) {
	case 0:
	  beta[0] = beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	case 1:
	  beta[0] = 1.0;
	  beta[1] = 0.0;
	  beta_flag = 1;
	  break;
	}


	/* varying extra precs */
	for (prec_val = 0; prec_val <= 2; prec_val++) {
	  switch (prec_val) {
	  case 0:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 1:
	    eps_int = power(2, -BITS_D);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
	    prec = blas_prec_double;
	    break;
	  case 2:
	  default:
	    eps_int = power(2, -BITS_E);
	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
	    prec = blas_prec_extra;
	    break;
	  }

	  /* values near underflow, 1, or overflow */
	  for (norm = -1; norm <= 1; norm++) {

	    /* number of tests */
	    for (i = 0; i < ntests; i++) {

	      /* row or col major */
	      for (order_val = 0; order_val < 2; order_val++) {
		switch (order_val) {
		case 0:
		  order_type = blas_rowmajor;
		  break;
		case 1:
		default:
		  order_type = blas_colmajor;
		  break;
		}

		/* no_trans, trans, or conj_trans */
		for (trans_val = 0; trans_val < 3; trans_val++) {
		  switch (trans_val) {
		  case 0:
		    trans_type = blas_no_trans;
		    m_i = m;
		    n_i = n;
		    break;
		  case 1:
		    trans_type = blas_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  case 2:
		  default:
		    trans_type = blas_conj_trans;
		    m_i = n;
		    n_i = m;
		    break;
		  }

		  /* lda=n, n+1, or 2n */
		  for (lda_val = 0; lda_val < 3; lda_val++) {
		    switch (lda_val) {
		    case 0:
		      lda = m_i;
		      break;
		    case 1:
		      lda = m_i + 1;
		      break;
		    case 2:
		    default:
		      lda = 2 * m_i;
		      break;
		    }
		    if ((order_type == blas_rowmajor && lda < n) ||
			(order_type == blas_colmajor && lda < m))
		      continue;

		    /* For the sake of speed, we throw out this case at random */
		    if (xrand(seed) >= test_prob)
		      continue;

		    /* in the trivial cases, no need to run testgen */
		    if (m > 0 && n > 0)
		      BLAS_zgemv2_d_d_testgen(norm, order_type, trans_type, m,
					      n, &alpha, alpha_flag, A, lda,
					      head_x_gen, tail_x_gen, &beta,
					      beta_flag, y_gen, seed,
					      head_r_true, tail_r_true);

		    count++;

		    /* varying incx */
		    for (incx_val = -2; incx_val <= 2; incx_val++) {
		      if (incx_val == 0)
			continue;

		      /* setting incx */
		      incx = incx_val;


		      dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
		      dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);

		      /* varying incy */
		      for (incy_val = -2; incy_val <= 2; incy_val++) {
			if (incy_val == 0)
			  continue;

			/* setting incy */
			incy = incy_val;
			incy *= 2;

			zcopy_vector(y_gen, m_i, 1, y, incy_val);

			/* call BLAS_zgemv2_d_d_x */
			FPU_FIX_STOP;
			BLAS_zgemv2_d_d_x(order_type, trans_type, m, n, alpha,
					  A, lda, head_x, tail_x, incx_val,
					  beta, y, incy_val, prec);
			FPU_FIX_START;

			/* set y starting index */
			iy = 0;
			if (incy < 0)
			  iy = -(m_i - 1) * incy;

			/* computing the ratio */
			if (m > 0 && n > 0)
			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
			    /* copy row j of A to temp */
			    dge_copy_row(order_type, trans_type, m_i, n_i, A,
					 lda, temp, j);

			    test_BLAS_zdot2_d_d(n_i, blas_no_conj, alpha,
						beta, &y_gen[k], &y[iy],
						&head_r_true[k],
						&tail_r_true[k], temp, 1,
						head_x, tail_x, incx_val,
						eps_int, un_int, &ratios[j]);

			    /* take the max ratio */
			    if (j == 0) {
			      ratio = ratios[0];
			      /* The !<= below causes NaN error to be detected.
			         Note that (NaN > thresh) is always false. */
			    } else if (!(ratios[j] <= ratio)) {
			      ratio = ratios[j];
			    }
			    iy += incy;
			  }

			/* Increase the number of bad ratio, if the ratio
			   is bigger than the threshold.
			   The !<= below causes NaN error to be detected.
			   Note that (NaN > thresh) is always false. */
			if (!(ratio <= thresh)) {
			  bad_ratios++;

			  if ((debug == 3) &&	/* print only when debug is on */
			      (count != old_count) &&	/* print if old vector is different 
							   from the current one */
			      (d_count == find_max_ratio) &&
			      (p_count <= max_print) &&
			      (ratio > 0.5 * ratio_max)) {
			    old_count = count;

			    printf
			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
			       fname, m, n, ntests, thresh);

			    /* Print test info */
			    switch (prec) {
			    case blas_prec_single:
			      printf("single ");
			      break;
			    case blas_prec_double:
			      printf("double ");
			      break;
			    case blas_prec_indigenous:
			      printf("indigenous ");
			      break;
			    case blas_prec_extra:
			      printf("extra ");
			      break;
			    }
			    switch (norm) {
			    case -1:
			      printf("near_underflow ");
			      break;
			    case 0:
			      printf("near_one ");
			      break;
			    case 1:
			      printf("near_overflow ");
			      break;
			    }
			    switch (order_type) {
			    case blas_rowmajor:
			      printf("row_major ");
			      break;
			    case blas_colmajor:
			      printf("col_major ");
			      break;
			    }
			    switch (trans_type) {
			    case blas_no_trans:
			      printf("no_trans ");
			      break;
			    case blas_trans:
			      printf("trans ");
			      break;
			    case blas_conj_trans:
			      printf("conj_trans ");
			      break;
			    }

			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
				   incy);

			    dge_print_matrix(A, m_i, n_i, lda, order_type,
					     "A");

			    dprint_vector(head_x, n_i, incx_val, "head_x");
			    dprint_vector(tail_x, n_i, incx_val, "tail_x");
			    zprint_vector(y_gen, m_i, 1, "y_gen");
			    zprint_vector(y, m_i, incy_val, "y_final");

			    printf("      ");
			    printf("alpha = ");
			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
			    printf("\n      ");
			    printf("beta = ");
			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
			    printf("\n");
			    for (j = 0, k = 0; j < m_i * incy_gen;
				 j += incy_gen, k++) {
			      printf("      ");
			      printf
				("([%24.16e  %24.16e], [%24.16e %24.16e])",
				 head_r_true[j], tail_r_true[j],
				 head_r_true[j + 1], tail_r_true[j + 1]);
			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
			    }

			    printf("      ratio=%.4e\n", ratio);
			    p_count++;
			  }
			  if (bad_ratios >= MAX_BAD_TESTS) {
			    printf("\ntoo many failures, exiting....");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
			    printf("\nFlagrant ratio error, exiting...");
			    printf("\nTesting and compilation");
			    printf(" are incomplete\n\n");
			    goto end;
			  }
			}
			if (d_count == 0) {
			  if (ratio > ratio_max)
			    ratio_max = ratio;

			  if (ratio != 0.0 && ratio < ratio_min)
			    ratio_min = ratio;

			  tot_tests++;
			}
		      }		/* incy */
		    }		/* incx */
		  }		/* lda */
		}		/* trans */
	      }			/* order */
	    }			/* tests */
	  }			/* norm */
	}			/* prec */
      }				/* beta */
    }				/* alpha */
  }				/* debug */

  if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
	   fname, m, n, ntests, thresh);
    printf
      ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
       bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
       ratio_min, ratio_max);
  }

end:
  FPU_FIX_STOP;

  blas_free(head_x);
  blas_free(tail_x);
  blas_free(y);
  blas_free(head_x_gen);
  blas_free(tail_x_gen);
  blas_free(y_gen);
  blas_free(temp);
  blas_free(A);
  blas_free(head_r_true);
  blas_free(tail_r_true);
  blas_free(ratios);

  *min_ratio = ratio_min;
  *num_bad_ratio = bad_ratios;
  *num_tests = tot_tests;
  return ratio_max;
}

#define NUMPAIRS 12

int main(int argc, char **argv)
{
  int nsizes, ntests, debug;
  double thresh, test_prob;
  double total_min_ratio, total_max_ratio;
  int total_bad_ratios;
  int seed, num_bad_ratio, num_tests;
  int total_tests, nr_failed_routines = 0, nr_routines = 0;
  double min_ratio, max_ratio;
  const char *base_routine = "gemv2";
  char *fname;
  int n;

  int m, i;
  int mn_pairs[NUMPAIRS][2] =
    { {0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {2, 1},
  {3, 1}, {2, 3}, {3, 3}, {2, 4}, {6, 6}, {10, 8}
  };

  if (argc != 6) {
    printf("Usage:\n");
    printf("do_test_gemv2 <nsizes> <ntests> <thresh> <debug> <test_prob>\n");
    printf("   <nsizes>: number of sizes to be run.\n");
    printf
      ("   <ntests>: the number of tests performed for each set of attributes\n");
    printf
      ("   <thresh>: to catch bad ratios if it is greater than <thresh>\n");
    printf("    <debug>: 0, 1, 2, or 3; \n");
    printf("        if 0, no printing \n");
    printf("        if 1, print error summary only if tests fail\n");
    printf("        if 2, print error summary for each n\n");
    printf("        if 3, print complete info each test fails \n");
    printf("<test_prob>: probability of preforming a given \n");
    printf("           test case: 0.0 does no tests, 1.0 does all tests\n");
    return -1;
  } else {
    nsizes = atoi(argv[1]);
    ntests = atoi(argv[2]);
    thresh = atof(argv[3]);
    debug = atoi(argv[4]);
    test_prob = atof(argv[5]);
  }

  seed = 1999;

  if (nsizes < 0 || ntests < 0 || debug < 0 || debug > 3)
    BLAS_error("Testing gemv2", 0, 0, NULL);

  printf("Testing %s...\n", base_routine);
  printf("INPUT: nsizes = %d, ntests = %d, thresh = %4.2f, debug = %d\n\n",
	 nsizes, ntests, thresh, debug);




  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_sgemv2";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_sgemv2(m, n, 1, &seed, thresh, debug, test_prob,
		     &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_dgemv2";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_dgemv2(m, n, 1, &seed, thresh, debug, test_prob,
		     &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_cgemv2";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_cgemv2(m, n, 1, &seed, thresh, debug, test_prob,
		     &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2(m, n, 1, &seed, thresh, debug, test_prob,
		     &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_dgemv2_d_s";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_dgemv2_d_s(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_dgemv2_s_d";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_dgemv2_s_d(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_dgemv2_s_s";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_dgemv2_s_s(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_z_c";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_z_c(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_c_z";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_c_z(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_c_c";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_c_c(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_cgemv2_c_s";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_cgemv2_c_s(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_cgemv2_s_c";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_cgemv2_s_c(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_cgemv2_s_s";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_cgemv2_s_s(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_z_d";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_z_d(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_d_z";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_d_z(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_d_d";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_d_d(m, n, 1, &seed, thresh, debug, test_prob,
			 &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_sgemv2_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_sgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
		       &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_dgemv2_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_dgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
		       &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_cgemv2_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_cgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
		       &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
		       &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_dgemv2_d_s_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_dgemv2_d_s_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_dgemv2_s_d_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_dgemv2_s_d_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_dgemv2_s_s_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_dgemv2_s_s_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_z_c_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_z_c_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_c_z_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_c_z_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_c_c_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_c_c_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_cgemv2_c_s_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_cgemv2_c_s_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_cgemv2_s_c_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_cgemv2_s_c_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_cgemv2_s_s_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_cgemv2_s_s_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_z_d_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_z_d_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_d_z_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_d_z_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);

  min_ratio = 1e308;
  max_ratio = 0.0;
  total_bad_ratios = 0;
  total_tests = 0;
  fname = "BLAS_zgemv2_d_d_x";
  printf("Testing %s...\n", fname);
  for (i = 0; i < nsizes; i++) {
    m = mn_pairs[i][0];
    n = mn_pairs[i][1];
    total_max_ratio =
      do_test_zgemv2_d_d_x(m, n, 1, &seed, thresh, debug, test_prob,
			   &total_min_ratio, &num_bad_ratio, &num_tests);
    if (total_max_ratio > max_ratio)
      max_ratio = total_max_ratio;

    if (total_min_ratio != 0 && total_min_ratio < min_ratio)
      min_ratio = total_min_ratio;

    total_bad_ratios += num_bad_ratio;
    total_tests += num_tests;
  }

  if (min_ratio == 1e308)
    min_ratio = 0.0;

  nr_routines++;
  if (total_bad_ratios == 0)
    printf("PASS> ");
  else {
    nr_failed_routines++;
    printf("FAIL> ");
  }

  printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
	 fname, total_bad_ratios, total_tests, max_ratio);



  printf("\n");
  if (nr_failed_routines)
    printf("FAILED ");
  else
    printf("PASSED ");
  printf("%-10s: FAIL/TOTAL = %d/%d\n",
	 base_routine, nr_failed_routines, nr_routines);

  return 0;
}

