[] add metering mode to CLI

author: shmel1k <shmel1k@ydb.tech> 2022-09-02 12:44:59 +0300
committer: shmel1k <shmel1k@ydb.tech> 2022-09-02 12:44:59 +0300
commit: 90d450f74722da7859d6f510a869f6c6908fd12f (patch)
tree: 538c718dedc76cdfe37ad6d01ff250dd930d9278 /contrib/libs/clapack/zgbtrf.c
parent: 01f64c1ecd0d4ffa9e3a74478335f1745f26cc75 (diff)
download: ydb-90d450f74722da7859d6f510a869f6c6908fd12f.tar.gz
1 files changed, 605 insertions, 0 deletions
diff --git a/contrib/libs/clapack/zgbtrf.c b/contrib/libs/clapack/zgbtrf.c
new file mode 100644
index 0000000000..2e5c495bde
--- /dev/null
+++ b/contrib/libs/clapack/zgbtrf.c
@@ -0,0 +1,605 @@
+/* zgbtrf.f -- translated by f2c (version 20061008).
+   You must link the resulting object file with libf2c:
+	on Microsoft Windows system, link with libf2c.lib;
+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+	or, if you install libf2c.a in a standard place, with -lf2c -lm
+	-- in that order, at the end of the command line, as in
+		cc *.o -lf2c -lm
+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+		http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "f2c.h"
+#include "blaswrap.h"
+
+/* Table of constant values */
+
+static doublecomplex c_b1 = {1.,0.};
+static integer c__1 = 1;
+static integer c__65 = 65;
+
+/* Subroutine */ int zgbtrf_(integer *m, integer *n, integer *kl, integer *ku, 
+	 doublecomplex *ab, integer *ldab, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju, 
+	    kv, nw;
+    doublecomplex temp;
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *, 
+	    doublecomplex *, integer *), zgemm_(char *, char *, integer *, 
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *, 
+	     doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
+	    integer *);
+    doublecomplex work13[4160]	/* was [65][64] */, work31[4160]	/* 
+	    was [65][64] */;
+    extern /* Subroutine */ int zgeru_(integer *, integer *, doublecomplex *, 
+	    doublecomplex *, integer *, doublecomplex *, integer *, 
+	    doublecomplex *, integer *), zcopy_(integer *, doublecomplex *, 
+	    integer *, doublecomplex *, integer *), zswap_(integer *, 
+	    doublecomplex *, integer *, doublecomplex *, integer *), ztrsm_(
+	    char *, char *, char *, char *, integer *, integer *, 
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
+	    integer *), zgbtf2_(integer *, 
+	    integer *, integer *, integer *, doublecomplex *, integer *, 
+	    integer *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *), izamax_(integer *, 
+	    doublecomplex *, integer *);
+    extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, 
+	     integer *, integer *, integer *, integer *);
+
+
+/*  -- LAPACK routine (version 3.2) -- */
+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+/*     November 2006 */
+
+/*     .. Scalar Arguments .. */
+/*     .. */
+/*     .. Array Arguments .. */
+/*     .. */
+
+/*  Purpose */
+/*  ======= */
+
+/*  ZGBTRF computes an LU factorization of a complex m-by-n band matrix A */
+/*  using partial pivoting with row interchanges. */
+
+/*  This is the blocked version of the algorithm, calling Level 3 BLAS. */
+
+/*  Arguments */
+/*  ========= */
+
+/*  M       (input) INTEGER */
+/*          The number of rows of the matrix A.  M >= 0. */
+
+/*  N       (input) INTEGER */
+/*          The number of columns of the matrix A.  N >= 0. */
+
+/*  KL      (input) INTEGER */
+/*          The number of subdiagonals within the band of A.  KL >= 0. */
+
+/*  KU      (input) INTEGER */
+/*          The number of superdiagonals within the band of A.  KU >= 0. */
+
+/*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N) */
+/*          On entry, the matrix A in band storage, in rows KL+1 to */
+/*          2*KL+KU+1; rows 1 to KL of the array need not be set. */
+/*          The j-th column of A is stored in the j-th column of the */
+/*          array AB as follows: */
+/*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */
+
+/*          On exit, details of the factorization: U is stored as an */
+/*          upper triangular band matrix with KL+KU superdiagonals in */
+/*          rows 1 to KL+KU+1, and the multipliers used during the */
+/*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */
+/*          See below for further details. */
+
+/*  LDAB    (input) INTEGER */
+/*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
+
+/*  IPIV    (output) INTEGER array, dimension (min(M,N)) */
+/*          The pivot indices; for 1 <= i <= min(M,N), row i of the */
+/*          matrix was interchanged with row IPIV(i). */
+
+/*  INFO    (output) INTEGER */
+/*          = 0: successful exit */
+/*          < 0: if INFO = -i, the i-th argument had an illegal value */
+/*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */
+/*               has been completed, but the factor U is exactly */
+/*               singular, and division by zero will occur if it is used */
+/*               to solve a system of equations. */
+
+/*  Further Details */
+/*  =============== */
+
+/*  The band storage scheme is illustrated by the following example, when */
+/*  M = N = 6, KL = 2, KU = 1: */
+
+/*  On entry:                       On exit: */
+
+/*      *    *    *    +    +    +       *    *    *   u14  u25  u36 */
+/*      *    *    +    +    +    +       *    *   u13  u24  u35  u46 */
+/*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
+/*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */
+/*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   * */
+/*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    * */
+
+/*  Array elements marked * are not used by the routine; elements marked */
+/*  + need not be set on entry, but are required by the routine to store */
+/*  elements of U because of fill-in resulting from the row interchanges. */
+
+/*  ===================================================================== */
+
+/*     .. Parameters .. */
+/*     .. */
+/*     .. Local Scalars .. */
+/*     .. */
+/*     .. Local Arrays .. */
+/*     .. */
+/*     .. External Functions .. */
+/*     .. */
+/*     .. External Subroutines .. */
+/*     .. */
+/*     .. Intrinsic Functions .. */
+/*     .. */
+/*     .. Executable Statements .. */
+
+/*     KV is the number of superdiagonals in the factor U, allowing for */
+/*     fill-in */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1;
+    ab -= ab_offset;
+    --ipiv;
+
+    /* Function Body */
+    kv = *ku + *kl;
+
+/*     Test the input parameters. */
+
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0) {
+	*info = -3;
+    } else if (*ku < 0) {
+	*info = -4;
+    } else if (*ldab < *kl + kv + 1) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGBTRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment */
+
+    nb = ilaenv_(&c__1, "ZGBTRF", " ", m, n, kl, ku);
+
+/*     The block size must not exceed the limit set by the size of the */
+/*     local arrays WORK13 and WORK31. */
+
+    nb = min(nb,64);
+
+    if (nb <= 1 || nb > *kl) {
+
+/*        Use unblocked code */
+
+	zgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code */
+
+/*        Zero the superdiagonal elements of the work array WORK13 */
+
+	i__1 = nb;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j - 1;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * 65 - 66;
+		work13[i__3].r = 0., work13[i__3].i = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+/*        Zero the subdiagonal elements of the work array WORK31 */
+
+	i__1 = nb;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = nb;
+	    for (i__ = j + 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * 65 - 66;
+		work31[i__3].r = 0., work31[i__3].i = 0.;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+/*        Gaussian elimination with partial pivoting */
+
+/*        Set fill-in elements in columns KU+2 to KV to zero */
+
+	i__1 = min(kv,*n);
+	for (j = *ku + 2; j <= i__1; ++j) {
+	    i__2 = *kl;
+	    for (i__ = kv - j + 2; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * ab_dim1;
+		ab[i__3].r = 0., ab[i__3].i = 0.;
+/* L50: */
+	    }
+/* L60: */
+	}
+
+/*        JU is the index of the last column affected by the current */
+/*        stage of the factorization */
+
+	ju = 1;
+
+	i__1 = min(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = min(*m,*n) - j + 1;
+	    jb = min(i__3,i__4);
+
+/*           The active part of the matrix is partitioned */
+
+/*              A11   A12   A13 */
+/*              A21   A22   A23 */
+/*              A31   A32   A33 */
+
+/*           Here A11, A21 and A31 denote the current block of JB columns */
+/*           which is about to be factorized. The number of rows in the */
+/*           partitioning are JB, I2, I3 respectively, and the numbers */
+/*           of columns are JB, J2, J3. The superdiagonal elements of A13 */
+/*           and the subdiagonal elements of A31 lie outside the band. */
+
+/* Computing MIN */
+	    i__3 = *kl - jb, i__4 = *m - j - jb + 1;
+	    i2 = min(i__3,i__4);
+/* Computing MIN */
+	    i__3 = jb, i__4 = *m - j - *kl + 1;
+	    i3 = min(i__3,i__4);
+
+/*           J2 and J3 are computed after JU has been updated. */
+
+/*           Factorize the current block of JB columns */
+
+	    i__3 = j + jb - 1;
+	    for (jj = j; jj <= i__3; ++jj) {
+
+/*              Set fill-in elements in column JJ+KV to zero */
+
+		if (jj + kv <= *n) {
+		    i__4 = *kl;
+		    for (i__ = 1; i__ <= i__4; ++i__) {
+			i__5 = i__ + (jj + kv) * ab_dim1;
+			ab[i__5].r = 0., ab[i__5].i = 0.;
+/* L70: */
+		    }
+		}
+
+/*              Find pivot and test for singularity. KM is the number of */
+/*              subdiagonal elements in the current column. */
+
+/* Computing MIN */
+		i__4 = *kl, i__5 = *m - jj;
+		km = min(i__4,i__5);
+		i__4 = km + 1;
+		jp = izamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1);
+		ipiv[jj] = jp + jj - j;
+		i__4 = kv + jp + jj * ab_dim1;
+		if (ab[i__4].r != 0. || ab[i__4].i != 0.) {
+/* Computing MAX */
+/* Computing MIN */
+		    i__6 = jj + *ku + jp - 1;
+		    i__4 = ju, i__5 = min(i__6,*n);
+		    ju = max(i__4,i__5);
+		    if (jp != 1) {
+
+/*                    Apply interchange to columns J to J+JB-1 */
+
+			if (jp + jj - 1 < j + *kl) {
+
+			    i__4 = *ldab - 1;
+			    i__5 = *ldab - 1;
+			    zswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], &
+				    i__4, &ab[kv + jp + jj - j + j * ab_dim1], 
+				     &i__5);
+			} else {
+
+/*                       The interchange affects columns J to JJ-1 of A31 */
+/*                       which are stored in the work array WORK31 */
+
+			    i__4 = jj - j;
+			    i__5 = *ldab - 1;
+			    zswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], 
+				    &i__5, &work31[jp + jj - j - *kl - 1], &
+				    c__65);
+			    i__4 = j + jb - jj;
+			    i__5 = *ldab - 1;
+			    i__6 = *ldab - 1;
+			    zswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, &
+				    ab[kv + jp + jj * ab_dim1], &i__6);
+			}
+		    }
+
+/*                 Compute multipliers */
+
+		    z_div(&z__1, &c_b1, &ab[kv + 1 + jj * ab_dim1]);
+		    zscal_(&km, &z__1, &ab[kv + 2 + jj * ab_dim1], &c__1);
+
+/*                 Update trailing submatrix within the band and within */
+/*                 the current block. JM is the index of the last column */
+/*                 which needs to be updated. */
+
+/* Computing MIN */
+		    i__4 = ju, i__5 = j + jb - 1;
+		    jm = min(i__4,i__5);
+		    if (jm > jj) {
+			i__4 = jm - jj;
+			z__1.r = -1., z__1.i = -0.;
+			i__5 = *ldab - 1;
+			i__6 = *ldab - 1;
+			zgeru_(&km, &i__4, &z__1, &ab[kv + 2 + jj * ab_dim1], 
+				&c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, &
+				ab[kv + 1 + (jj + 1) * ab_dim1], &i__6);
+		    }
+		} else {
+
+/*                 If pivot is zero, set INFO to the index of the pivot */
+/*                 unless a zero pivot has already been found. */
+
+		    if (*info == 0) {
+			*info = jj;
+		    }
+		}
+
+/*              Copy current column of A31 into the work array WORK31 */
+
+/* Computing MIN */
+		i__4 = jj - j + 1;
+		nw = min(i__4,i3);
+		if (nw > 0) {
+		    zcopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], &
+			    c__1, &work31[(jj - j + 1) * 65 - 65], &c__1);
+		}
+/* L80: */
+	    }
+	    if (j + jb <= *n) {
+
+/*              Apply the row interchanges to the other blocks. */
+
+/* Computing MIN */
+		i__3 = ju - j + 1;
+		j2 = min(i__3,kv) - jb;
+/* Computing MAX */
+		i__3 = 0, i__4 = ju - j - kv + 1;
+		j3 = max(i__3,i__4);
+
+/*              Use ZLASWP to apply the row interchanges to A12, A22, and */
+/*              A32. */
+
+		i__3 = *ldab - 1;
+		zlaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &
+			c__1, &jb, &ipiv[j], &c__1);
+
+/*              Adjust the pivot indices. */
+
+		i__3 = j + jb - 1;
+		for (i__ = j; i__ <= i__3; ++i__) {
+		    ipiv[i__] = ipiv[i__] + j - 1;
+/* L90: */
+		}
+
+/*              Apply the row interchanges to A13, A23, and A33 */
+/*              columnwise. */
+
+		k2 = j - 1 + jb + j2;
+		i__3 = j3;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    jj = k2 + i__;
+		    i__4 = j + jb - 1;
+		    for (ii = j + i__ - 1; ii <= i__4; ++ii) {
+			ip = ipiv[ii];
+			if (ip != ii) {
+			    i__5 = kv + 1 + ii - jj + jj * ab_dim1;
+			    temp.r = ab[i__5].r, temp.i = ab[i__5].i;
+			    i__5 = kv + 1 + ii - jj + jj * ab_dim1;
+			    i__6 = kv + 1 + ip - jj + jj * ab_dim1;
+			    ab[i__5].r = ab[i__6].r, ab[i__5].i = ab[i__6].i;
+			    i__5 = kv + 1 + ip - jj + jj * ab_dim1;
+			    ab[i__5].r = temp.r, ab[i__5].i = temp.i;
+			}
+/* L100: */
+		    }
+/* L110: */
+		}
+
+/*              Update the relevant part of the trailing submatrix */
+
+		if (j2 > 0) {
+
+/*                 Update A12 */
+
+		    i__3 = *ldab - 1;
+		    i__4 = *ldab - 1;
+		    ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, 
+			    &c_b1, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv + 
+			    1 - jb + (j + jb) * ab_dim1], &i__4);
+
+		    if (i2 > 0) {
+
+/*                    Update A22 */
+
+			z__1.r = -1., z__1.i = -0.;
+			i__3 = *ldab - 1;
+			i__4 = *ldab - 1;
+			i__5 = *ldab - 1;
+			zgemm_("No transpose", "No transpose", &i2, &j2, &jb, 
+				&z__1, &ab[kv + 1 + jb + j * ab_dim1], &i__3, 
+				&ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4, 
+				&c_b1, &ab[kv + 1 + (j + jb) * ab_dim1], &
+				i__5);
+		    }
+
+		    if (i3 > 0) {
+
+/*                    Update A32 */
+
+			z__1.r = -1., z__1.i = -0.;
+			i__3 = *ldab - 1;
+			i__4 = *ldab - 1;
+			zgemm_("No transpose", "No transpose", &i3, &j2, &jb, 
+				&z__1, work31, &c__65, &ab[kv + 1 - jb + (j + 
+				jb) * ab_dim1], &i__3, &c_b1, &ab[kv + *kl + 
+				1 - jb + (j + jb) * ab_dim1], &i__4);
+		    }
+		}
+
+		if (j3 > 0) {
+
+/*                 Copy the lower triangle of A13 into the work array */
+/*                 WORK13 */
+
+		    i__3 = j3;
+		    for (jj = 1; jj <= i__3; ++jj) {
+			i__4 = jb;
+			for (ii = jj; ii <= i__4; ++ii) {
+			    i__5 = ii + jj * 65 - 66;
+			    i__6 = ii - jj + 1 + (jj + j + kv - 1) * ab_dim1;
+			    work13[i__5].r = ab[i__6].r, work13[i__5].i = ab[
+				    i__6].i;
+/* L120: */
+			}
+/* L130: */
+		    }
+
+/*                 Update A13 in the work array */
+
+		    i__3 = *ldab - 1;
+		    ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, 
+			    &c_b1, &ab[kv + 1 + j * ab_dim1], &i__3, work13, &
+			    c__65);
+
+		    if (i2 > 0) {
+
+/*                    Update A23 */
+
+			z__1.r = -1., z__1.i = -0.;
+			i__3 = *ldab - 1;
+			i__4 = *ldab - 1;
+			zgemm_("No transpose", "No transpose", &i2, &j3, &jb, 
+				&z__1, &ab[kv + 1 + jb + j * ab_dim1], &i__3, 
+				work13, &c__65, &c_b1, &ab[jb + 1 + (j + kv) *
+				 ab_dim1], &i__4);
+		    }
+
+		    if (i3 > 0) {
+
+/*                    Update A33 */
+
+			z__1.r = -1., z__1.i = -0.;
+			i__3 = *ldab - 1;
+			zgemm_("No transpose", "No transpose", &i3, &j3, &jb, 
+				&z__1, work31, &c__65, work13, &c__65, &c_b1, 
+				&ab[*kl + 1 + (j + kv) * ab_dim1], &i__3);
+		    }
+
+/*                 Copy the lower triangle of A13 back into place */
+
+		    i__3 = j3;
+		    for (jj = 1; jj <= i__3; ++jj) {
+			i__4 = jb;
+			for (ii = jj; ii <= i__4; ++ii) {
+			    i__5 = ii - jj + 1 + (jj + j + kv - 1) * ab_dim1;
+			    i__6 = ii + jj * 65 - 66;
+			    ab[i__5].r = work13[i__6].r, ab[i__5].i = work13[
+				    i__6].i;
+/* L140: */
+			}
+/* L150: */
+		    }
+		}
+	    } else {
+
+/*              Adjust the pivot indices. */
+
+		i__3 = j + jb - 1;
+		for (i__ = j; i__ <= i__3; ++i__) {
+		    ipiv[i__] = ipiv[i__] + j - 1;
+/* L160: */
+		}
+	    }
+
+/*           Partially undo the interchanges in the current block to */
+/*           restore the upper triangular form of A31 and copy the upper */
+/*           triangle of A31 back into place */
+
+	    i__3 = j;
+	    for (jj = j + jb - 1; jj >= i__3; --jj) {
+		jp = ipiv[jj] - jj + 1;
+		if (jp != 1) {
+
+/*                 Apply interchange to columns J to JJ-1 */
+
+		    if (jp + jj - 1 < j + *kl) {
+
+/*                    The interchange does not affect A31 */
+
+			i__4 = jj - j;
+			i__5 = *ldab - 1;
+			i__6 = *ldab - 1;
+			zswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
+				i__5, &ab[kv + jp + jj - j + j * ab_dim1], &
+				i__6);
+		    } else {
+
+/*                    The interchange does affect A31 */
+
+			i__4 = jj - j;
+			i__5 = *ldab - 1;
+			zswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
+				i__5, &work31[jp + jj - j - *kl - 1], &c__65);
+		    }
+		}
+
+/*              Copy the current column of A31 back into place */
+
+/* Computing MIN */
+		i__4 = i3, i__5 = jj - j + 1;
+		nw = min(i__4,i__5);
+		if (nw > 0) {
+		    zcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[
+			    kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1);
+		}
+/* L170: */
+	    }
+/* L180: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZGBTRF */
+
+} /* zgbtrf_ */
author	shmel1k <shmel1k@ydb.tech>	2022-09-02 12:44:59 +0300
committer	shmel1k <shmel1k@ydb.tech>	2022-09-02 12:44:59 +0300
commit	90d450f74722da7859d6f510a869f6c6908fd12f (patch)
tree	538c718dedc76cdfe37ad6d01ff250dd930d9278 /contrib/libs/clapack/zgbtrf.c
parent	01f64c1ecd0d4ffa9e3a74478335f1745f26cc75 (diff)
download	ydb-90d450f74722da7859d6f510a869f6c6908fd12f.tar.gz