dnl --------------------------------------------------------- dnl TBSV ---- Triangular Banded Solve dnl dnl x <--- alpha * T^-1 * x dnl dnl where matrix T is a triangular band matrix. dnl --------------------------------------------------------- dnl #include #include "blas_extended.h" #include "blas_extended_private.h" include(cblas.m4)dnl include(tbsv-common.m4)dnl dnl dnl dnl Usage: dnl TBSV ($1, $2, $3) dnl TBSV_HEAD ($1, $2, $3) dnl TBSV_NAME ($1, $2, $3) dnl TBSV_PARAMS ($1, $2, $3) dnl TBSV_COMMENT($1, $2, $3) dnl dnl $1 -- type of alpha, x. dnl $2 -- type of T matrix dnl $3 -- set to `_x' for _x routines. dnl Otherwise, `'. dnl define(`TBSV_COMMENT',` /* * Purpose * ======= * * This routine solves : * * x <- alpha * inverse(t) * x * * Arguments * ========= * * order (input) enum blas_order_type * column major, row major (blas_rowmajor, blas_colmajor) * * uplo (input) enum blas_uplo_type * upper, lower (blas_upper, blas_lower) * * trans (input) enum blas_trans_type * no trans, trans, conj trans * * diag (input) enum blas_diag_type * unit, non unit (blas_unit_diag, blas_non_unit_diag) * * n (input) int * the dimension of t * * k (input) int * the number of subdiagonals/superdiagonals of t * * alpha (input) $1_scalar * * t (input) $2_array * Triangular Banded matrix * * x (input) const $1_array * Array of length n. * * incx (input) int * The stride used to access components x[i]. * PREC_COMMENT($3)dnl */')dnl dnl dnl dnl -------------------------------------------------------------------- dnl Usage: TBSV_INIT(ax_typeltr, T_typeltr, _x) dnl declare TBSV local variables, and check inputs dnl -------------------------------------------------------------------- dnl define(`TBSV_INIT', ` int i, j; /* used to keep track of loop counts */ int xi; /* used to `index' vector x */ int start_xi; /* used as the starting idx to vector x */ int incxi; int Tij; /* `index' inside of Banded structure */ int dot_start, dot_start_inc1, dot_start_inc2, dot_inc; PTR_CAST(t, $2_type, `const') /* internal matrix t */ PTR_CAST(x, $1_type, `') /* internal x */ SCALAR_CAST(alpha, $1_type) /* internal alpha */ if (order != blas_rowmajor && order != blas_colmajor) { BLAS_error(routine_name, -1, order, 0); } if (uplo != blas_upper && uplo != blas_lower) { BLAS_error(routine_name, -2, uplo, 0); } if ((trans != blas_trans) && (trans != blas_no_trans) && (trans != blas_conj) && (trans != blas_conj_trans)) { BLAS_error(routine_name, -2, uplo, 0); } if (diag != blas_non_unit_diag && diag != blas_unit_diag) { BLAS_error(routine_name, -4, diag, 0); } if (n < 0) { BLAS_error(routine_name, -5, n, 0); } if (k >= n) { BLAS_error(routine_name, -6, k, 0); } if ((ldt < 1) || (ldt <= k)) { BLAS_error(routine_name, -9, ldt, 0); } if (incx == 0) { BLAS_error(routine_name, -11, incx, 0); } if (n <= 0) return; incxi = incx; INC_ADJUST(incxi, $1_type) /* configuring the vector starting idx */ if (incxi < 0) { start_xi = (1-n)*incxi; } else { start_xi = 0; } /* if alpha is zero, then return x as a zero vector */ if (TEST_0(alpha_i, $1_type)){ xi = start_xi; for(i=0; i0; i--){ GET_VECTOR_ELEMENT(T_element, t_i, Tij, $2) CONJ(T_element, $2, $4) GET_VECTOR_ELEMENT(x_elem, x_i, xi, $1) MUL(temp2, $3, x_elem, $1, T_element, $2) SUB(temp1, $3, temp1, $3, temp2, $3) xi += incxi; Tij += dot_inc; } /* for across row */ /* if the diagonal entry is not equal to one, then divide Xj by the entry */ if (diag == blas_non_unit_diag){ GET_VECTOR_ELEMENT(T_element, t_i, Tij, $2) CONJ(T_element, $2, $4) DIV(temp1, $3, temp1, $3, T_element, $2) } /* if (diag == blas_non_unit_diag) */ SET_ROUND_VECTOR_ELEMENT(x_i, xi, temp1, $3) xi += incxi; } /* for j0; i--){ GET_VECTOR_ELEMENT(T_element, t_i, Tij, $2) CONJ(T_element, $2, $4) GET_VECTOR_ELEMENT(x_elem, x_i, xi, $1) MUL(temp2, $3, x_elem, $1, T_element, $2) SUB(temp1, $3, temp1, $3, temp2, $3) xi += incxi; Tij += dot_inc; } /* for across row */ /* if the diagonal entry is not equal to one, then divide by the entry */ if (diag == blas_non_unit_diag){ GET_VECTOR_ELEMENT(T_element, t_i, Tij, $2) CONJ(T_element, $2, $4) DIV(temp1, $3, temp1, $3, T_element, $2) } /* if (diag == blas_non_unit_diag) */ SET_ROUND_VECTOR_ELEMENT(x_i, xi, temp1, $3) xi += incxi; } /* for j0; i--){ GET_VECTOR_ELEMENT(T_element, t_i, Tij, $2) CONJ(T_element, $2, $4) GET_ARRAY_ELEMENT(temp3, $3, x_internal, $3, x_inti) MUL(temp2, $3, temp3, $3, T_element, $2) SUB(temp1, $3, temp1, $3, temp2, $3) x_inti += inc_x_inti; Tij += dot_inc; } /* for across row */ /* if the diagonal entry is not equal to one, then divide Xj by the entry */ if (diag == blas_non_unit_diag){ GET_VECTOR_ELEMENT(T_element, t_i, Tij, $2) CONJ(T_element, $2, $4) DIV(temp1, $3, temp1, $3, T_element, $2) } /* if (diag == blas_non_unit_diag) */ /* place internal precision result in internal buffer */ SET_ARRAY_ELEMENT(temp1, $3, x_internal, $3, x_inti) /* place result x in same place as got x this loop */ SET_ROUND_VECTOR_ELEMENT(x_i, xi, temp1, $3) xi += incxi; } /* for j0 && (x_inti < k_compare); i--){ GET_VECTOR_ELEMENT(T_element, t_i, Tij, $2) CONJ(T_element, $2, $4) GET_ARRAY_ELEMENT(temp3, $3, x_internal, $3, x_inti) MUL(temp2, $3, temp3, $3, T_element, $2) SUB(temp1, $3, temp1, $3, temp2, $3) x_inti += inc_x_inti; Tij += dot_inc; } /* for across row */ /*reset `index' to internal storage loop buffer.*/ x_inti = 0; for (; i>0; i--){ GET_VECTOR_ELEMENT(T_element, t_i, Tij, $2) CONJ(T_element, $2, $4) GET_ARRAY_ELEMENT(temp3, $3, x_internal, $3, x_inti) MUL(temp2, $3, temp3, $3, T_element, $2) SUB(temp1, $3, temp1, $3, temp2, $3) x_inti += inc_x_inti; Tij += dot_inc; } /* for across row */ /* if the diagonal entry is not equal to one, then divide by the entry */ if (diag == blas_non_unit_diag){ GET_VECTOR_ELEMENT(T_element, t_i, Tij, $2) CONJ(T_element, $2, $4) DIV(temp1, $3, temp1, $3, T_element, $2) } /* if (diag == blas_non_unit_diag) */ /* place internal precision result in internal buffer */ SET_ARRAY_ELEMENT(temp1, $3, x_internal, $3, x_inti) x_inti += inc_x_inti; if(x_inti >= k_compare) x_inti = 0; /* place result x in same place as got x this loop */ SET_ROUND_VECTOR_ELEMENT(x_i, xi, temp1, $3) xi += incxi; } /* for j