diff options
author | shmel1k <shmel1k@ydb.tech> | 2022-09-02 12:44:59 +0300 |
---|---|---|
committer | shmel1k <shmel1k@ydb.tech> | 2022-09-02 12:44:59 +0300 |
commit | 90d450f74722da7859d6f510a869f6c6908fd12f (patch) | |
tree | 538c718dedc76cdfe37ad6d01ff250dd930d9278 /contrib/libs/clapack/dgbtrf.c | |
parent | 01f64c1ecd0d4ffa9e3a74478335f1745f26cc75 (diff) | |
download | ydb-90d450f74722da7859d6f510a869f6c6908fd12f.tar.gz |
[] add metering mode to CLI
Diffstat (limited to 'contrib/libs/clapack/dgbtrf.c')
-rw-r--r-- | contrib/libs/clapack/dgbtrf.c | 588 |
1 files changed, 588 insertions, 0 deletions
diff --git a/contrib/libs/clapack/dgbtrf.c b/contrib/libs/clapack/dgbtrf.c new file mode 100644 index 0000000000..7b90c9bc8e --- /dev/null +++ b/contrib/libs/clapack/dgbtrf.c @@ -0,0 +1,588 @@ +/* dgbtrf.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__65 = 65; +static doublereal c_b18 = -1.; +static doublereal c_b31 = 1.; + +/* Subroutine */ int dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, + doublereal *ab, integer *ldab, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + + /* Local variables */ + integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju, + kv, nw; + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal temp; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemm_(char *, char *, integer *, integer *, integer * +, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dcopy_( + integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer * +); + doublereal work13[4160] /* was [65][64] */, work31[4160] /* + was [65][64] */; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), dgbtf2_( + integer *, integer *, integer *, integer *, doublereal *, integer + *, integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *); + extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + + +/* -- LAPACK routine (version 3.2) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DGBTRF computes an LU factorization of a real m-by-n band matrix A */ +/* using partial pivoting with row interchanges. */ + +/* This is the blocked version of the algorithm, calling Level 3 BLAS. */ + +/* Arguments */ +/* ========= */ + +/* M (input) INTEGER */ +/* The number of rows of the matrix A. M >= 0. */ + +/* N (input) INTEGER */ +/* The number of columns of the matrix A. N >= 0. */ + +/* KL (input) INTEGER */ +/* The number of subdiagonals within the band of A. KL >= 0. */ + +/* KU (input) INTEGER */ +/* The number of superdiagonals within the band of A. KU >= 0. */ + +/* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ +/* On entry, the matrix A in band storage, in rows KL+1 to */ +/* 2*KL+KU+1; rows 1 to KL of the array need not be set. */ +/* The j-th column of A is stored in the j-th column of the */ +/* array AB as follows: */ +/* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */ + +/* On exit, details of the factorization: U is stored as an */ +/* upper triangular band matrix with KL+KU superdiagonals in */ +/* rows 1 to KL+KU+1, and the multipliers used during the */ +/* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ +/* See below for further details. */ + +/* LDAB (input) INTEGER */ +/* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ + +/* IPIV (output) INTEGER array, dimension (min(M,N)) */ +/* The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* matrix was interchanged with row IPIV(i). */ + +/* INFO (output) INTEGER */ +/* = 0: successful exit */ +/* < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */ +/* has been completed, but the factor U is exactly */ +/* singular, and division by zero will occur if it is used */ +/* to solve a system of equations. */ + +/* Further Details */ +/* =============== */ + +/* The band storage scheme is illustrated by the following example, when */ +/* M = N = 6, KL = 2, KU = 1: */ + +/* On entry: On exit: */ + +/* * * * + + + * * * u14 u25 u36 */ +/* * * + + + + * * u13 u24 u35 u46 */ +/* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ +/* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ +/* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ +/* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ + +/* Array elements marked * are not used by the routine; elements marked */ +/* + need not be set on entry, but are required by the routine to store */ +/* elements of U because of fill-in resulting from the row interchanges. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* KV is the number of superdiagonals in the factor U, allowing for */ +/* fill-in */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1; + ab -= ab_offset; + --ipiv; + + /* Function Body */ + kv = *ku + *kl; + +/* Test the input parameters. */ + + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*ldab < *kl + kv + 1) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGBTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment */ + + nb = ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku); + +/* The block size must not exceed the limit set by the size of the */ +/* local arrays WORK13 and WORK31. */ + + nb = min(nb,64); + + if (nb <= 1 || nb > *kl) { + +/* Use unblocked code */ + + dgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); + } else { + +/* Use blocked code */ + +/* Zero the superdiagonal elements of the work array WORK13 */ + + i__1 = nb; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work13[i__ + j * 65 - 66] = 0.; +/* L10: */ + } +/* L20: */ + } + +/* Zero the subdiagonal elements of the work array WORK31 */ + + i__1 = nb; + for (j = 1; j <= i__1; ++j) { + i__2 = nb; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work31[i__ + j * 65 - 66] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* Gaussian elimination with partial pivoting */ + +/* Set fill-in elements in columns KU+2 to KV to zero */ + + i__1 = min(kv,*n); + for (j = *ku + 2; j <= i__1; ++j) { + i__2 = *kl; + for (i__ = kv - j + 2; i__ <= i__2; ++i__) { + ab[i__ + j * ab_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + +/* JU is the index of the last column affected by the current */ +/* stage of the factorization */ + + ju = 1; + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = min(*m,*n) - j + 1; + jb = min(i__3,i__4); + +/* The active part of the matrix is partitioned */ + +/* A11 A12 A13 */ +/* A21 A22 A23 */ +/* A31 A32 A33 */ + +/* Here A11, A21 and A31 denote the current block of JB columns */ +/* which is about to be factorized. The number of rows in the */ +/* partitioning are JB, I2, I3 respectively, and the numbers */ +/* of columns are JB, J2, J3. The superdiagonal elements of A13 */ +/* and the subdiagonal elements of A31 lie outside the band. */ + +/* Computing MIN */ + i__3 = *kl - jb, i__4 = *m - j - jb + 1; + i2 = min(i__3,i__4); +/* Computing MIN */ + i__3 = jb, i__4 = *m - j - *kl + 1; + i3 = min(i__3,i__4); + +/* J2 and J3 are computed after JU has been updated. */ + +/* Factorize the current block of JB columns */ + + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + +/* Set fill-in elements in column JJ+KV to zero */ + + if (jj + kv <= *n) { + i__4 = *kl; + for (i__ = 1; i__ <= i__4; ++i__) { + ab[i__ + (jj + kv) * ab_dim1] = 0.; +/* L70: */ + } + } + +/* Find pivot and test for singularity. KM is the number of */ +/* subdiagonal elements in the current column. */ + +/* Computing MIN */ + i__4 = *kl, i__5 = *m - jj; + km = min(i__4,i__5); + i__4 = km + 1; + jp = idamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1); + ipiv[jj] = jp + jj - j; + if (ab[kv + jp + jj * ab_dim1] != 0.) { +/* Computing MAX */ +/* Computing MIN */ + i__6 = jj + *ku + jp - 1; + i__4 = ju, i__5 = min(i__6,*n); + ju = max(i__4,i__5); + if (jp != 1) { + +/* Apply interchange to columns J to J+JB-1 */ + + if (jp + jj - 1 < j + *kl) { + + i__4 = *ldab - 1; + i__5 = *ldab - 1; + dswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], & + i__4, &ab[kv + jp + jj - j + j * ab_dim1], + &i__5); + } else { + +/* The interchange affects columns J to JJ-1 of A31 */ +/* which are stored in the work array WORK31 */ + + i__4 = jj - j; + i__5 = *ldab - 1; + dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], + &i__5, &work31[jp + jj - j - *kl - 1], & + c__65); + i__4 = j + jb - jj; + i__5 = *ldab - 1; + i__6 = *ldab - 1; + dswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, & + ab[kv + jp + jj * ab_dim1], &i__6); + } + } + +/* Compute multipliers */ + + d__1 = 1. / ab[kv + 1 + jj * ab_dim1]; + dscal_(&km, &d__1, &ab[kv + 2 + jj * ab_dim1], &c__1); + +/* Update trailing submatrix within the band and within */ +/* the current block. JM is the index of the last column */ +/* which needs to be updated. */ + +/* Computing MIN */ + i__4 = ju, i__5 = j + jb - 1; + jm = min(i__4,i__5); + if (jm > jj) { + i__4 = jm - jj; + i__5 = *ldab - 1; + i__6 = *ldab - 1; + dger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1], + &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, & + ab[kv + 1 + (jj + 1) * ab_dim1], &i__6); + } + } else { + +/* If pivot is zero, set INFO to the index of the pivot */ +/* unless a zero pivot has already been found. */ + + if (*info == 0) { + *info = jj; + } + } + +/* Copy current column of A31 into the work array WORK31 */ + +/* Computing MIN */ + i__4 = jj - j + 1; + nw = min(i__4,i3); + if (nw > 0) { + dcopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], & + c__1, &work31[(jj - j + 1) * 65 - 65], &c__1); + } +/* L80: */ + } + if (j + jb <= *n) { + +/* Apply the row interchanges to the other blocks. */ + +/* Computing MIN */ + i__3 = ju - j + 1; + j2 = min(i__3,kv) - jb; +/* Computing MAX */ + i__3 = 0, i__4 = ju - j - kv + 1; + j3 = max(i__3,i__4); + +/* Use DLASWP to apply the row interchanges to A12, A22, and */ +/* A32. */ + + i__3 = *ldab - 1; + dlaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, & + c__1, &jb, &ipiv[j], &c__1); + +/* Adjust the pivot indices. */ + + i__3 = j + jb - 1; + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = ipiv[i__] + j - 1; +/* L90: */ + } + +/* Apply the row interchanges to A13, A23, and A33 */ +/* columnwise. */ + + k2 = j - 1 + jb + j2; + i__3 = j3; + for (i__ = 1; i__ <= i__3; ++i__) { + jj = k2 + i__; + i__4 = j + jb - 1; + for (ii = j + i__ - 1; ii <= i__4; ++ii) { + ip = ipiv[ii]; + if (ip != ii) { + temp = ab[kv + 1 + ii - jj + jj * ab_dim1]; + ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 + + ip - jj + jj * ab_dim1]; + ab[kv + 1 + ip - jj + jj * ab_dim1] = temp; + } +/* L100: */ + } +/* L110: */ + } + +/* Update the relevant part of the trailing submatrix */ + + if (j2 > 0) { + +/* Update A12 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, + &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv + + 1 - jb + (j + jb) * ab_dim1], &i__4); + + if (i2 > 0) { + +/* Update A22 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + i__5 = *ldab - 1; + dgemm_("No transpose", "No transpose", &i2, &j2, &jb, + &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, + &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4, + &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], & + i__5); + } + + if (i3 > 0) { + +/* Update A32 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + dgemm_("No transpose", "No transpose", &i3, &j2, &jb, + &c_b18, work31, &c__65, &ab[kv + 1 - jb + (j + + jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl + + 1 - jb + (j + jb) * ab_dim1], &i__4); + } + } + + if (j3 > 0) { + +/* Copy the lower triangle of A13 into the work array */ +/* WORK13 */ + + i__3 = j3; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = jb; + for (ii = jj; ii <= i__4; ++ii) { + work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj + + j + kv - 1) * ab_dim1]; +/* L120: */ + } +/* L130: */ + } + +/* Update A13 in the work array */ + + i__3 = *ldab - 1; + dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, + &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13, + &c__65); + + if (i2 > 0) { + +/* Update A23 */ + + i__3 = *ldab - 1; + i__4 = *ldab - 1; + dgemm_("No transpose", "No transpose", &i2, &j3, &jb, + &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, + work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv) + * ab_dim1], &i__4); + } + + if (i3 > 0) { + +/* Update A33 */ + + i__3 = *ldab - 1; + dgemm_("No transpose", "No transpose", &i3, &j3, &jb, + &c_b18, work31, &c__65, work13, &c__65, & + c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], & + i__3); + } + +/* Copy the lower triangle of A13 back into place */ + + i__3 = j3; + for (jj = 1; jj <= i__3; ++jj) { + i__4 = jb; + for (ii = jj; ii <= i__4; ++ii) { + ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] = + work13[ii + jj * 65 - 66]; +/* L140: */ + } +/* L150: */ + } + } + } else { + +/* Adjust the pivot indices. */ + + i__3 = j + jb - 1; + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = ipiv[i__] + j - 1; +/* L160: */ + } + } + +/* Partially undo the interchanges in the current block to */ +/* restore the upper triangular form of A31 and copy the upper */ +/* triangle of A31 back into place */ + + i__3 = j; + for (jj = j + jb - 1; jj >= i__3; --jj) { + jp = ipiv[jj] - jj + 1; + if (jp != 1) { + +/* Apply interchange to columns J to JJ-1 */ + + if (jp + jj - 1 < j + *kl) { + +/* The interchange does not affect A31 */ + + i__4 = jj - j; + i__5 = *ldab - 1; + i__6 = *ldab - 1; + dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & + i__5, &ab[kv + jp + jj - j + j * ab_dim1], & + i__6); + } else { + +/* The interchange does affect A31 */ + + i__4 = jj - j; + i__5 = *ldab - 1; + dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & + i__5, &work31[jp + jj - j - *kl - 1], &c__65); + } + } + +/* Copy the current column of A31 back into place */ + +/* Computing MIN */ + i__4 = i3, i__5 = jj - j + 1; + nw = min(i__4,i__5); + if (nw > 0) { + dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[ + kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1); + } +/* L170: */ + } +/* L180: */ + } + } + + return 0; + +/* End of DGBTRF */ + +} /* dgbtrf_ */ |