diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake
index 04dd49cfbe..bf346a1b5b 100644
--- a/cmake/lapack.cmake
+++ b/cmake/lapack.cmake
@@ -70,8 +70,8 @@ set(SLASRC
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqp2rk.f slaqp3rk.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
- slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
- slarf1f.f slarf1l.f slarrv.f slartv.f
+ slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarft_lvl2.f slarfx.f
+ slarfy.f slargv.f slarf1f.f slarf1l.f slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f
@@ -177,7 +177,7 @@ set(CLASRC
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
claqz0.f claqz1.f claqz2.f claqz3.f
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
- clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f
+ clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f clarft_lvl2.f
clarf1f.f clarf1l.f
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f90 clartv.f
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f90
@@ -262,8 +262,8 @@ set(DLASRC
dlaqgb.f dlaqge.f dlaqp2.f dlaqp2rk.f dlaqp3rk.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
- dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
- dlarf1f.f dlarf1l.f dlargv.f dlarrv.f dlartv.f
+ dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarft_lvl2.f
+ dlarfx.f dlarfy.f dlarf1f.f dlarf1l.f dlargv.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlasy2.f
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f
@@ -372,7 +372,7 @@ set(ZLASRC
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f
- zlarfg.f zlarfgp.f zlarft.f zlarf1f.f zlarf1l.f
+ zlarfg.f zlarfgp.f zlarft.f zlarft_lvl2.f zlarf1f.f zlarf1l.f
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f90 zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
zlassq.f90 zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f
@@ -575,8 +575,8 @@ set(SLASRC
slaqgb.c slaqge.c slaqp2.c slaqp2rk.c slaqp3rk.c slaqps.c slaqsb.c slaqsp.c slaqsy.c
slaqr0.c slaqr1.c slaqr2.c slaqr3.c slaqr4.c slaqr5.c
slaqtr.c slar1v.c slar2v.c ilaslr.c ilaslc.c
- slarf.c slarfb.c slarfb_gett.c slarfg.c slarfgp.c slarft.c slarfx.c slarfy.c slargv.c
- slarf1f.c slarf1l.c slarrv.c slartv.c
+ slarf.c slarfb.c slarfb_gett.c slarfg.c slarfgp.c slarft.c slarft_lvl2.c slarfx.c
+ slarfy.c slargv.c slarf1f.c slarf1l.c slarrv.c slartv.c
slarz.c slarzb.c slarzt.c slasy2.c
slasyf.c slasyf_rook.c slasyf_rk.c slasyf_aa.c
slatbs.c slatdf.c slatps.c slatrd.c slatrs.c slatrz.c
@@ -682,7 +682,7 @@ set(CLASRC
claqr0.c claqr1.c claqr2.c claqr3.c claqr4.c claqr5.c
claqsp.c claqsy.c clar1v.c clar2v.c ilaclr.c ilaclc.c
clarf.c clarfb.c clarfb_gett.c clarfg.c clarfgp.c clarft.c
- clarf1f.c clarf1l.c
+ clarft_lvl2.c clarf1f.c clarf1l.c
clarfx.c clarfy.c clargv.c clarnv.c clarrv.c clartg.c clartv.c
clarz.c clarzb.c clarzt.c clascl.c claset.c clasr.c classq.c
clasyf.c clasyf_rook.c clasyf_rk.c clasyf_aa.c
@@ -765,8 +765,8 @@ set(DLASRC
dlaqgb.c dlaqge.c dlaqp2.c dlaqp2rk.c dlaqp3rk.c dlaqps.c dlaqsb.c dlaqsp.c dlaqsy.c
dlaqr0.c dlaqr1.c dlaqr2.c dlaqr3.c dlaqr4.c dlaqr5.c
dlaqtr.c dlar1v.c dlar2v.c iladlr.c iladlc.c
- dlarf.c dlarfb.c dlarfb_gett.c dlarfg.c dlarfgp.c dlarft.c dlarfx.c dlarfy.c
- dlarf1f.c dlarf1l.c dlargv.c dlarrv.c dlartv.c
+ dlarf.c dlarfb.c dlarfb_gett.c dlarfg.c dlarfgp.c dlarft.c dlarft_lvl2.c
+ dlarfx.c dlarfy.c dlarf1f.c dlarf1l.c dlargv.c dlarrv.c dlartv.c
dlarz.c dlarzb.c dlarzt.c dlasy2.c
dlasyf.c dlasyf_rook.c dlasyf_rk.c dlasyf_aa.c
dlatbs.c dlatdf.c dlatps.c dlatrd.c dlatrs.c dlatrz.c
@@ -874,7 +874,7 @@ set(ZLASRC
zlaqr0.c zlaqr1.c zlaqr2.c zlaqr3.c zlaqr4.c zlaqr5.c
zlaqsp.c zlaqsy.c zlar1v.c zlar2v.c ilazlr.c ilazlc.c
zlarcm.c zlarf.c zlarfb.c zlarfb_gett.c zlarf1f.c zlarf1l.c
- zlarfg.c zlarfgp.c zlarft.c
+ zlarfg.c zlarfgp.c zlarft.c zlarft_lvl2.c
zlarfx.c zlarfy.c zlargv.c zlarnv.c zlarrv.c zlartg.c zlartv.c
zlarz.c zlarzb.c zlarzt.c zlascl.c zlaset.c zlasr.c
zlassq.c zlasyf.c zlasyf_rook.c zlasyf_rk.c zlasyf_aa.c
diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile
index ebf3431a92..2200a4628d 100644
--- a/lapack-netlib/SRC/Makefile
+++ b/lapack-netlib/SRC/Makefile
@@ -154,7 +154,7 @@ SLASRC_O = \
slaqgb.o slaqge.o slaqp2.o slaqp2rk.o slaqp3rk.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \
slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \
- slarf.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \
+ slarf.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarft_lvl2.o slarfx.o slarfy.o slargv.o \
slarf1f.o slarf1l.o slarrv.o slartv.o \
slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \
slasyf_rk.o \
@@ -270,7 +270,7 @@ CLASRC_O = \
claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \
claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \
claqz0.o claqz1.o claqz2.o claqz3.o \
- clarf.o clarfb.o clarfb_gett.o clarfg.o clarft.o clarfgp.o \
+ clarf.o clarfb.o clarfb_gett.o clarfg.o clarft.o clarft_lvl2.o clarfgp.o \
clarf1f.o clarf1l.o \
clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
@@ -364,7 +364,7 @@ DLASRC_O = \
dlaqgb.o dlaqge.o dlaqp2.o dlaqp2rk.o dlaqp3rk.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \
dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \
- dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \
+ dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarft_lvl2.o dlarfx.o dlarfy.o \
dlarf1f.o dlarf1l.o dlargv.o dlarrv.o dlartv.o \
dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \
dlasyf.o dlasyf_rook.o dlasyf_rk.o \
@@ -479,7 +479,7 @@ ZLASRC_O = \
zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \
zlaqz0.o zlaqz1.o zlaqz2.o zlaqz3.o \
zlarcm.o zlarf.o zlarfb.o zlarfb_gett.o \
- zlarfg.o zlarft.o zlarfgp.o zlarf1f.o zlarf1l.o \
+ zlarfg.o zlarft.o zlarft_lvl2.o zlarfgp.o zlarf1f.o zlarf1l.o \
zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
zlassq.o zlaswp.o zlasyf.o zlasyf_rook.o zlasyf_rk.o zlasyf_aa.o \
diff --git a/lapack-netlib/SRC/clarft.c b/lapack-netlib/SRC/clarft.c
index f44cb810e7..e8e518c5ca 100644
--- a/lapack-netlib/SRC/clarft.c
+++ b/lapack-netlib/SRC/clarft.c
@@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
-static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@@ -525,12 +524,13 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
*/
-
-
/* Table of constant values */
static complex c_b1 = {1.f,0.f};
-static integer c__1 = 1;
+static complex c_b3 = {-1.f,0.f};
+static integer c__3 = 3;
+static integer c_n1 = -1;
+static integer c__2 = 2;
/* > \brief \b CLARFT forms the triangular factor T of a block reflector H = I - vtvH */
@@ -539,7 +539,6 @@ static integer c__1 = 1;
/* Online html documentation available at */
/* http://www.netlib.org/lapack/explore-html/ */
-/* > \htmlonly */
/* > Download CLARFT + dependencies */
/* > */
@@ -550,12 +549,11 @@ f"> */
/* > */
/* > [TXT] */
-/* > \endhtmlonly */
/* Definition: */
/* =========== */
-/* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
+/* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
/* CHARACTER DIRECT, STOREV */
/* INTEGER K, LDT, LDV, N */
@@ -663,9 +661,7 @@ f"> */
/* > \author Univ. of Colorado Denver */
/* > \author NAG Ltd. */
-/* > \date December 2016 */
-
-/* > \ingroup complexOTHERauxiliary */
+/* > \ingroup larft */
/* > \par Further Details: */
/* ===================== */
@@ -694,255 +690,539 @@ f"> */
/* > \endverbatim */
/* > */
/* ===================================================================== */
-/* Subroutine */ void clarft_(char *direct, char *storev, integer *n, integer *
+/* Subroutine */ int clarft_(char *direct, char *storev, integer *n, integer *
k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt)
{
/* System generated locals */
- integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
- complex q__1={0.,0.}, q__2={0.,0.}, q__3={0.,0.};
+ address a__1[2];
+ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2[2], i__3, i__4;
+ complex q__1;
+ char ch__1[2];
/* Local variables */
- integer i__, j;
- extern /* Subroutine */ void cgemm_(char *, char *, integer *, integer *,
+ integer i__, j, l;
+ logical lq, ql, qr;
+ integer nx;
+ extern /* Subroutine */ int clarft_lvl2__(char *, char *, integer *,
+ integer *, complex *, integer *, complex *, complex *, integer *);
+ logical dirf, colv;
+ extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
integer *, complex *, complex *, integer *, complex *, integer *,
- complex *, complex *, integer *), cgemv_(char *,
- integer *, integer *, complex *, complex *, integer *, complex *,
- integer *, complex *, complex *, integer *);
+ complex *, complex *, integer *);
extern logical lsame_(char *, char *);
- integer lastv;
- extern /* Subroutine */ void ctrmv_(char *, char *, char *, integer *,
- complex *, integer *, complex *, integer *);
- integer prevlastv;
- extern /* Subroutine */ void mecago_();
+ extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *,
+ integer *, integer *, complex *, complex *, integer *, complex *,
+ integer *), clacpy_(char *,
+ integer *, integer *, complex *, integer *, complex *, integer *);
+ extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+ integer *, integer *, ftnlen, ftnlen);
-/* -- LAPACK auxiliary routine (version 3.7.0) -- */
+/* -- LAPACK auxiliary routine -- */
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-/* December 2016 */
-/* ===================================================================== */
+
+
+
+
+
+
+
+
+
+
+/* The general scheme used is inspired by the approach inside DGEQRT3 */
+/* which was (at the time of writing this code): */
+/* Based on the algorithm of Elmroth and Gustavson, */
+/* IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+
/* Quick return if possible */
/* Parameter adjustments */
v_dim1 = *ldv;
- v_offset = 1 + v_dim1 * 1;
+ v_offset = 1 + v_dim1;
v -= v_offset;
--tau;
t_dim1 = *ldt;
- t_offset = 1 + t_dim1 * 1;
+ t_offset = 1 + t_dim1;
t -= t_offset;
/* Function Body */
- if (*n == 0) {
- return;
+ if (*n == 0 || *k == 0) {
+ return 0;
}
- if (lsame_(direct, "F")) {
- prevlastv = *n;
- i__1 = *k;
- for (i__ = 1; i__ <= i__1; ++i__) {
- prevlastv = f2cmax(prevlastv,i__);
- i__2 = i__;
- if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) {
+/* Base case */
-/* H(i) = I */
+ if (*n == 1 || *k == 1) {
+ i__1 = t_dim1 + 1;
+ t[i__1].r = tau[1].r, t[i__1].i = tau[1].i;
+ return 0;
+ }
- i__2 = i__;
- for (j = 1; j <= i__2; ++j) {
- i__3 = j + i__ * t_dim1;
- t[i__3].r = 0.f, t[i__3].i = 0.f;
- }
- } else {
-
-/* general case */
-
- if (lsame_(storev, "C")) {
-/* Skip any trailing zeros. */
- i__2 = i__ + 1;
- for (lastv = *n; lastv >= i__2; --lastv) {
- i__3 = lastv + i__ * v_dim1;
- if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
- myexit_();
- }
- }
- i__2 = i__ - 1;
- for (j = 1; j <= i__2; ++j) {
- i__3 = j + i__ * t_dim1;
- i__4 = i__;
- q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i;
- r_cnjg(&q__3, &v[i__ + j * v_dim1]);
- q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i =
- q__2.r * q__3.i + q__2.i * q__3.r;
- t[i__3].r = q__1.r, t[i__3].i = q__1.i;
- }
- j = f2cmin(lastv,prevlastv);
-
-/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */
-
- i__2 = j - i__;
- i__3 = i__ - 1;
- i__4 = i__;
- q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
- cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__
- + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &
- c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1);
- } else {
-/* Skip any trailing zeros. */
- i__2 = i__ + 1;
- for (lastv = *n; lastv >= i__2; --lastv) {
- i__3 = i__ + lastv * v_dim1;
- if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
- myexit_();
- }
- }
- i__2 = i__ - 1;
- for (j = 1; j <= i__2; ++j) {
- i__3 = j + i__ * t_dim1;
- i__4 = i__;
- q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i;
- i__5 = j + i__ * v_dim1;
- q__1.r = q__2.r * v[i__5].r - q__2.i * v[i__5].i,
- q__1.i = q__2.r * v[i__5].i + q__2.i * v[i__5]
- .r;
- t[i__3].r = q__1.r, t[i__3].i = q__1.i;
- }
- j = f2cmin(lastv,prevlastv);
-
-/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */
-
- i__2 = i__ - 1;
- i__3 = j - i__;
- i__4 = i__;
- q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
- cgemm_("N", "C", &i__2, &c__1, &i__3, &q__1, &v[(i__ + 1)
- * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1],
- ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt);
- }
+/* Determine when to cross over into the level 2 based implementation */
+
+/* Writing concatenation */
+ i__2[0] = 1, a__1[0] = direct;
+ i__2[1] = 1, a__1[1] = storev;
+ s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+ nx = ilaenv_(&c__3, "CLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, (
+ ftnlen)2);
+ if (*k < nx) {
+ clarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+ return 0;
+ }
-/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
-
- i__2 = i__ - 1;
- ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
- t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
- i__2 = i__ + i__ * t_dim1;
- i__3 = i__;
- t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
- if (i__ > 1) {
- prevlastv = f2cmax(prevlastv,lastv);
- } else {
- prevlastv = lastv;
- }
+/* Beginning of executable statements */
+
+ l = *k / 2;
+
+/* Determine what kind of Q we need to compute */
+/* We assume that if the user doesn't provide 'F' for DIRECT, */
+/* then they meant to provide 'B' and if they don't provide */
+/* 'C' for STOREV, then they meant to provide 'R' */
+
+ dirf = lsame_(direct, "F");
+ colv = lsame_(storev, "C");
+
+/* QR happens when we have forward direction in column storage */
+
+ qr = dirf && colv;
+
+/* LQ happens when we have forward direction in row storage */
+
+ lq = dirf && ! colv;
+
+/* QL happens when we have backward direction in column storage */
+
+ ql = ! dirf && colv;
+
+/* The last case is RQ. Due to how we structured this, if the */
+/* above 3 are false, then RQ must be true, so we never store */
+/* this */
+/* RQ happens when we have backward direction in row storage */
+/* RQ = (.NOT.DIRF).AND.(.NOT.COLV) */
+
+ if (qr) {
+
+/* Break V apart into 6 components */
+
+/* V = |---------------| */
+/* |V_{1,1} 0 | */
+/* |V_{2,1} V_{2,2}| */
+/* |V_{3,1} V_{3,2}| */
+/* |---------------| */
+
+/* V_{1,1}\in\C^{l,l} unit lower triangular */
+/* V_{2,1}\in\C^{k-l,l} rectangular */
+/* V_{3,1}\in\C^{n-k,l} rectangular */
+
+/* V_{2,2}\in\C^{k-l,k-l} unit lower triangular */
+/* V_{3,2}\in\C^{n-k,k-l} rectangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} T_{1,2}| */
+/* |0 T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\C^{l, l} upper triangular */
+/* T_{2,2}\in\C^{k-l, k-l} upper triangular */
+/* T_{1,2}\in\C^{l, k-l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */
+/* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */
+
+/* Define T{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */
+
+/* Then, we can define the matrix V as */
+/* V = |-------| */
+/* |V_1 V_2| */
+/* |-------| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V*T*V' */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{1,2} */
+
+/* Compute T_{1,1} recursively */
+
+ clarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ i__1 = *n - l;
+ i__3 = *k - l;
+ clarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1],
+ ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/* Compute T_{1,2} */
+/* T_{1,2} = V_{2,1}' */
+
+ i__1 = l;
+ for (j = 1; j <= i__1; ++j) {
+ i__3 = *k - l;
+ for (i__ = 1; i__ <= i__3; ++i__) {
+ i__4 = j + (l + i__) * t_dim1;
+ r_cnjg(&q__1, &v[l + i__ + j * v_dim1]);
+ t[i__4].r = q__1.r, t[i__4].i = q__1.i;
}
}
- } else {
- prevlastv = 1;
- for (i__ = *k; i__ >= 1; --i__) {
- i__1 = i__;
- if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) {
-/* H(i) = I */
+/* T_{1,2} = T_{1,2}*V_{2,2} */
- i__1 = *k;
- for (j = i__; j <= i__1; ++j) {
- i__2 = j + i__ * t_dim1;
- t[i__2].r = 0.f, t[i__2].i = 0.f;
- }
- } else {
-
-/* general case */
-
- if (i__ < *k) {
- if (lsame_(storev, "C")) {
-/* Skip any leading zeros. */
- i__1 = i__ - 1;
- for (lastv = 1; lastv <= i__1; ++lastv) {
- i__2 = lastv + i__ * v_dim1;
- if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
- myexit_();
- }
- }
- i__1 = *k;
- for (j = i__ + 1; j <= i__1; ++j) {
- i__2 = j + i__ * t_dim1;
- i__3 = i__;
- q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i;
- r_cnjg(&q__3, &v[*n - *k + i__ + j * v_dim1]);
- q__1.r = q__2.r * q__3.r - q__2.i * q__3.i,
- q__1.i = q__2.r * q__3.i + q__2.i *
- q__3.r;
- t[i__2].r = q__1.r, t[i__2].i = q__1.i;
- }
- j = f2cmax(lastv,prevlastv);
-
-/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */
-
- i__1 = *n - *k + i__ - j;
- i__2 = *k - i__;
- i__3 = i__;
- q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
- cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[
- j + (i__ + 1) * v_dim1], ldv, &v[j + i__ *
- v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ *
- t_dim1], &c__1);
- } else {
-/* Skip any leading zeros. */
- i__1 = i__ - 1;
- for (lastv = 1; lastv <= i__1; ++lastv) {
- i__2 = i__ + lastv * v_dim1;
- if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
- myexit_();
- }
- }
- i__1 = *k;
- for (j = i__ + 1; j <= i__1; ++j) {
- i__2 = j + i__ * t_dim1;
- i__3 = i__;
- q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i;
- i__4 = j + (*n - *k + i__) * v_dim1;
- q__1.r = q__2.r * v[i__4].r - q__2.i * v[i__4].i,
- q__1.i = q__2.r * v[i__4].i + q__2.i * v[
- i__4].r;
- t[i__2].r = q__1.r, t[i__2].i = q__1.i;
- }
- j = f2cmax(lastv,prevlastv);
-
-/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */
-
- i__1 = *k - i__;
- i__2 = *n - *k + i__ - j;
- i__3 = i__;
- q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
- cgemm_("N", "C", &i__1, &c__1, &i__2, &q__1, &v[i__ +
- 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
- ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt);
- }
-
-/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
- i__1 = *k - i__;
- ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
- + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
- t_dim1], &c__1)
- ;
- if (i__ > 1) {
- prevlastv = f2cmin(prevlastv,lastv);
- } else {
- prevlastv = lastv;
- }
- }
- i__1 = i__ + i__ * t_dim1;
- i__2 = i__;
- t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+ i__1 = *k - l;
+ ctrmm_("Right", "Lower", "No transpose", "Unit", &l, &i__1, &c_b1, &v[
+ l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__1 = *k - l;
+ i__3 = *n - *k;
+ cgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[*k +
+ 1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b1, &
+ t[(l + 1) * t_dim1 + 1], ldt);
+
+/* At this point, we have that T_{1,2} = V_1'*V_2 */
+/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/* respectively. */
+
+/* T_{1,2} = -T_{1,1}*T_{1,2} */
+
+ i__1 = *k - l;
+ ctrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3,
+ &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*T_{2,2} */
+
+ i__1 = *k - l;
+ ctrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+ &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1],
+ ldt);
+ } else if (lq) {
+
+/* Break V apart into 6 components */
+
+/* V = |----------------------| */
+/* |V_{1,1} V_{1,2} V{1,3}| */
+/* |0 V_{2,2} V{2,3}| */
+/* |----------------------| */
+
+/* V_{1,1}\in\C^{l,l} unit upper triangular */
+/* V_{1,2}\in\C^{l,k-l} rectangular */
+/* V_{1,3}\in\C^{l,n-k} rectangular */
+
+/* V_{2,2}\in\C^{k-l,k-l} unit upper triangular */
+/* V_{2,3}\in\C^{k-l,n-k} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} T_{1,2}| */
+/* |0 T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\C^{l, l} upper triangular */
+/* T_{2,2}\in\C^{k-l, k-l} upper triangular */
+/* T_{1,2}\in\C^{l, k-l} rectangular */
+
+/* Then, consider the product: */
+
+/* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */
+/* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */
+
+/* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */
+
+/* Then, we can define the matrix V as */
+/* V = |---| */
+/* |V_1| */
+/* |V_2| */
+/* |---| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V'*T*V */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{1,2} */
+
+/* Compute T_{1,1} recursively */
+
+ clarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ i__1 = *n - l;
+ i__3 = *k - l;
+ clarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1],
+ ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/* Compute T_{1,2} */
+/* T_{1,2} = V_{1,2} */
+
+ i__1 = *k - l;
+ clacpy_("All", &l, &i__1, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) *
+ t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*V_{2,2}' */
+
+ i__1 = *k - l;
+ ctrmm_("Right", "Upper", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[l
+ + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__1 = *k - l;
+ i__3 = *n - *k;
+ cgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[(*k +
+ 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, &
+ c_b1, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* At this point, we have that T_{1,2} = V_1*V_2' */
+/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/* respectively. */
+
+/* T_{1,2} = -T_{1,1}*T_{1,2} */
+
+ i__1 = *k - l;
+ ctrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3,
+ &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*T_{2,2} */
+
+ i__1 = *k - l;
+ ctrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+ &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1],
+ ldt);
+ } else if (ql) {
+
+/* Break V apart into 6 components */
+
+/* V = |---------------| */
+/* |V_{1,1} V_{1,2}| */
+/* |V_{2,1} V_{2,2}| */
+/* |0 V_{3,2}| */
+/* |---------------| */
+
+/* V_{1,1}\in\C^{n-k,k-l} rectangular */
+/* V_{2,1}\in\C^{k-l,k-l} unit upper triangular */
+
+/* V_{1,2}\in\C^{n-k,l} rectangular */
+/* V_{2,2}\in\C^{k-l,l} rectangular */
+/* V_{3,2}\in\C^{l,l} unit upper triangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} 0 | */
+/* |T_{2,1} T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */
+/* T_{2,2}\in\C^{l, l} non-unit lower triangular */
+/* T_{2,1}\in\C^{k-l, l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */
+/* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */
+
+/* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */
+
+/* Then, we can define the matrix V as */
+/* V = |-------| */
+/* |V_1 V_2| */
+/* |-------| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V*T*V' */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{2,1} */
+
+/* Compute T_{1,1} recursively */
+
+ i__1 = *n - l;
+ i__3 = *k - l;
+ clarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ clarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, &
+ tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/* Compute T_{2,1} */
+/* T_{2,1} = V_{2,2}' */
+
+ i__1 = *k - l;
+ for (j = 1; j <= i__1; ++j) {
+ i__3 = l;
+ for (i__ = 1; i__ <= i__3; ++i__) {
+ i__4 = *k - l + i__ + j * t_dim1;
+ r_cnjg(&q__1, &v[*n - *k + j + (*k - l + i__) * v_dim1]);
+ t[i__4].r = q__1.r, t[i__4].i = q__1.i;
}
}
- }
- return;
-/* End of CLARFT */
+/* T_{2,1} = T_{2,1}*V_{2,1} */
+
+ i__1 = *k - l;
+ ctrmm_("Right", "Upper", "No transpose", "Unit", &l, &i__1, &c_b1, &v[
+ *n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+/* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__1 = *k - l;
+ i__3 = *n - *k;
+ cgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[(*k -
+ l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b1, &t[*k -
+ l + 1 + t_dim1], ldt);
+
+/* At this point, we have that T_{2,1} = V_2'*V_1 */
+/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/* respectively. */
+
+/* T_{2,1} = -T_{2,2}*T_{2,1} */
+
+ i__1 = *k - l;
+ ctrmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b3,
+ &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 +
+ t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*T_{1,1} */
+
+ i__1 = *k - l;
+ ctrmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+ &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+ } else {
+
+/* Else means RQ case */
+
+/* Break V apart into 6 components */
+
+/* V = |-----------------------| */
+/* |V_{1,1} V_{1,2} 0 | */
+/* |V_{2,1} V_{2,2} V_{2,3}| */
+/* |-----------------------| */
+
+/* V_{1,1}\in\C^{k-l,n-k} rectangular */
+/* V_{1,2}\in\C^{k-l,k-l} unit lower triangular */
+
+/* V_{2,1}\in\C^{l,n-k} rectangular */
+/* V_{2,2}\in\C^{l,k-l} rectangular */
+/* V_{2,3}\in\C^{l,l} unit lower triangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} 0 | */
+/* |T_{2,1} T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */
+/* T_{2,2}\in\C^{l, l} non-unit lower triangular */
+/* T_{2,1}\in\C^{k-l, l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */
+/* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */
+
+/* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */
+
+/* Then, we can define the matrix V as */
+/* V = |---| */
+/* |V_1| */
+/* |V_2| */
+/* |---| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V'*T*V */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{2,1} */
+
+/* Compute T_{1,1} recursively */
+
+ i__1 = *n - l;
+ i__3 = *k - l;
+ clarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ clarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k
+ - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/* Compute T_{2,1} */
+/* T_{2,1} = V_{2,2} */
+
+ i__1 = *k - l;
+ clacpy_("All", &l, &i__1, &v[*k - l + 1 + (*n - *k + 1) * v_dim1],
+ ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*V_{1,2}' */
+
+ i__1 = *k - l;
+ ctrmm_("Right", "Lower", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[(*
+ n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__1 = *k - l;
+ i__3 = *n - *k;
+ cgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[*k -
+ l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - l + 1
+ + t_dim1], ldt);
+
+/* At this point, we have that T_{2,1} = V_2*V_1' */
+/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/* respectively. */
+
+/* T_{2,1} = -T_{2,2}*T_{2,1} */
+
+ i__1 = *k - l;
+ ctrmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b3, &
+ t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 +
+ t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*T_{1,1} */
+
+ i__1 = *k - l;
+ ctrmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b1,
+ &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+ }
+ return 0;
} /* clarft_ */
diff --git a/lapack-netlib/SRC/clarft.f b/lapack-netlib/SRC/clarft.f
index de8b97bf9c..3bf2448ba8 100644
--- a/lapack-netlib/SRC/clarft.f
+++ b/lapack-netlib/SRC/clarft.f
@@ -5,7 +5,6 @@
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
-*> \htmlonly
*> Download CLARFT + dependencies
*>
*> [TGZ]
@@ -13,7 +12,6 @@
*> [ZIP]
*>
*> [TXT]
-*> \endhtmlonly
*
* Definition:
* ===========
@@ -161,6 +159,7 @@
* =====================================================================
RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
$ TAU, T, LDT )
+ IMPLICIT NONE
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -179,11 +178,12 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
* .. Parameters ..
*
COMPLEX ONE, NEG_ONE, ZERO
- PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0)
+ PARAMETER(ONE=(1.0E+0,0.0E+0), ZERO = (0.0E+0,0.0E+0),
+ $ NEG_ONE=(-1.0E+0,0.0E+0))
*
* .. Local Scalars ..
*
- INTEGER I,J,L
+ INTEGER I,J,L,NX
LOGICAL QR,LQ,QL,DIRF,COLV
*
* .. External Subroutines ..
@@ -193,7 +193,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
* .. External Functions..
*
LOGICAL LSAME
- EXTERNAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
*
* .. Intrinsic Functions..
*
@@ -219,6 +220,14 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
RETURN
END IF
*
+* Determine when to cross over into the level 2 based implementation
+*
+ NX = ILAENV(3, "CLARFT", DIRECT // STOREV, N, K, -1, -1)
+ IF(K.LT.NX) THEN
+ CALL CLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
+ RETURN
+ END IF
+*
* Beginning of executable statements
*
L = K / 2
diff --git a/lapack-netlib/SRC/clarft_lvl2.c b/lapack-netlib/SRC/clarft_lvl2.c
new file mode 100644
index 0000000000..2ca5490ee0
--- /dev/null
+++ b/lapack-netlib/SRC/clarft_lvl2.c
@@ -0,0 +1,935 @@
+#include
+#include
+#include
+#include
+#include
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef blasint logical;
+
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{ flag cierr;
+ ftnint ciunit;
+ flag ciend;
+ char *cifmt;
+ ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{ flag icierr;
+ char *iciunit;
+ flag iciend;
+ char *icifmt;
+ ftnint icirlen;
+ ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{ flag oerr;
+ ftnint ounit;
+ char *ofnm;
+ ftnlen ofnmlen;
+ char *osta;
+ char *oacc;
+ char *ofm;
+ ftnint orl;
+ char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{ flag cerr;
+ ftnint cunit;
+ char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{ flag aerr;
+ ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{ flag inerr;
+ ftnint inunit;
+ char *infile;
+ ftnlen infilen;
+ ftnint *inex; /*parameters in standard's order*/
+ ftnint *inopen;
+ ftnint *innum;
+ ftnint *innamed;
+ char *inname;
+ ftnlen innamlen;
+ char *inacc;
+ ftnlen inacclen;
+ char *inseq;
+ ftnlen inseqlen;
+ char *indir;
+ ftnlen indirlen;
+ char *infmt;
+ ftnlen infmtlen;
+ char *inform;
+ ftnint informlen;
+ char *inunf;
+ ftnlen inunflen;
+ ftnint *inrecl;
+ ftnint *innrec;
+ char *inblank;
+ ftnlen inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype { /* for multiple entry points */
+ integer1 g;
+ shortint h;
+ integer i;
+ /* longint j; */
+ real r;
+ doublereal d;
+ complex c;
+ doublecomplex z;
+ };
+
+typedef union Multitype Multitype;
+
+struct Vardesc { /* for Namelist */
+ char *name;
+ char *addr;
+ ftnlen *dims;
+ int type;
+ };
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+ char *name;
+ Vardesc **vars;
+ int nvars;
+ };
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b) ((a) >> (b) & 1)
+#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+ float pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+static double dpow_ui(double x, integer n) {
+ double pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+ complex pow={1.0,0.0}; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+ for(u = n; ; ) {
+ if(u & 01) pow.r *= x.r, pow.i *= x.i;
+ if(u >>= 1) x.r *= x.r, x.i *= x.i;
+ else break;
+ }
+ }
+ _Fcomplex p={pow.r, pow.i};
+ return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+ _Complex float pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+ _Dcomplex pow={1.0,0.0}; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+ for(u = n; ; ) {
+ if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+ if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+ else break;
+ }
+ }
+ _Dcomplex p = {pow._Val[0], pow._Val[1]};
+ return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+ _Complex double pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+ integer pow; unsigned long int u;
+ if (n <= 0) {
+ if (n == 0 || x == 1) pow = 1;
+ else if (x != -1) pow = x == 0 ? 1/x : 0;
+ else n = -n;
+ }
+ if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+ u = n;
+ for(pow = 1; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+ double m; integer i, mi;
+ for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+ if (w[i-1]>m) mi=i ,m=w[i-1];
+ return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+ float m; integer i, mi;
+ for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+ if (w[i-1]>m) mi=i ,m=w[i-1];
+ return mi-s+1;
+}
+
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+ integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+ _Fcomplex zdotc = {0.0, 0.0};
+ if (incx == 1 && incy == 1) {
+ for (i=0;i \brief \b CLARFT_LVL2: Level 2 BLAS version for terminating case of CLARFT */
+
+/* =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/* http://www.netlib.org/lapack/explore-html/ */
+
+/* > Download CLARFT_LVL2 + dependencies */
+/* > */
+/* > [TGZ] */
+/* > */
+/* > [ZIP] */
+/* > */
+/* > [TXT] */
+
+/* Definition: */
+/* =========== */
+
+/* SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */
+/* T, LDT ) */
+
+/* CHARACTER DIRECT, STOREV */
+/* INTEGER K, LDT, LDV, N */
+/* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/* ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > CLARFT_LVL2 forms the triangular factor T of a complex block reflector H */
+/* > of order n, which is defined as a product of k elementary reflectors. */
+/* > */
+/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
+/* > */
+/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
+/* > */
+/* > If STOREV = 'C', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th column of the array V, and */
+/* > */
+/* > H = I - V * T * V**H */
+/* > */
+/* > If STOREV = 'R', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th row of the array V, and */
+/* > */
+/* > H = I - V**H * T * V */
+/* > \endverbatim */
+
+/* Arguments: */
+/* ========== */
+
+/* > \param[in] DIRECT */
+/* > \verbatim */
+/* > DIRECT is CHARACTER*1 */
+/* > Specifies the order in which the elementary reflectors are */
+/* > multiplied to form the block reflector: */
+/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */
+/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] STOREV */
+/* > \verbatim */
+/* > STOREV is CHARACTER*1 */
+/* > Specifies how the vectors which define the elementary */
+/* > reflectors are stored (see also Further Details): */
+/* > = 'C': columnwise */
+/* > = 'R': rowwise */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* > N is INTEGER */
+/* > The order of the block reflector H. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* > K is INTEGER */
+/* > The order of the triangular factor T (= the number of */
+/* > elementary reflectors). K >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* > V is COMPLEX array, dimension */
+/* > (LDV,K) if STOREV = 'C' */
+/* > (LDV,N) if STOREV = 'R' */
+/* > The matrix V. See further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* > LDV is INTEGER */
+/* > The leading dimension of the array V. */
+/* > If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TAU */
+/* > \verbatim */
+/* > TAU is COMPLEX array, dimension (K) */
+/* > TAU(i) must contain the scalar factor of the elementary */
+/* > reflector H(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* > T is COMPLEX array, dimension (LDT,K) */
+/* > The k by k triangular factor T of the block reflector. */
+/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
+/* > lower triangular. The rest of the array is not used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* > LDT is INTEGER */
+/* > The leading dimension of the array T. LDT >= K. */
+/* > \endverbatim */
+
+/* Authors: */
+/* ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup larft */
+
+/* > \par Further Details: */
+/* ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The shape of the matrix V and the storage of the vectors which define */
+/* > the H(i) is best illustrated by the following example with n = 5 and */
+/* > k = 3. The elements equal to 1 are not stored. */
+/* > */
+/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */
+/* > */
+/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */
+/* > ( v1 1 ) ( 1 v2 v2 v2 ) */
+/* > ( v1 v2 1 ) ( 1 v3 v3 ) */
+/* > ( v1 v2 v3 ) */
+/* > ( v1 v2 v3 ) */
+/* > */
+/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */
+/* > */
+/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */
+/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */
+/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */
+/* > ( 1 v3 ) */
+/* > ( 1 ) */
+/* > \endverbatim */
+/* > */
+/* ===================================================================== */
+/* Subroutine */ int clarft_lvl2__(char *direct, char *storev, integer *n,
+ integer *k, complex *v, integer *ldv, complex *tau, complex *t,
+ integer *ldt)
+{
+ /* System generated locals */
+ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
+ complex q__1, q__2, q__3;
+
+ /* Local variables */
+ integer i__, j, prevlastv;
+ extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+ integer *, complex *, complex *, integer *, complex *, integer *,
+ complex *, complex *, integer *), cgemv_(char *,
+ integer *, integer *, complex *, complex *, integer *, complex *,
+ integer *, complex *, complex *, integer *);
+ extern logical lsame_(char *, char *);
+ integer lastv;
+ extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *,
+ complex *, integer *, complex *, integer *), mecago_();
+
+
+/* -- LAPACK auxiliary routine -- */
+/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
+/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+
+/* ===================================================================== */
+
+
+/* Quick return if possible */
+
+ /* Parameter adjustments */
+ v_dim1 = *ldv;
+ v_offset = 1 + v_dim1;
+ v -= v_offset;
+ --tau;
+ t_dim1 = *ldt;
+ t_offset = 1 + t_dim1;
+ t -= t_offset;
+
+ /* Function Body */
+ if (*n == 0) {
+ return 0;
+ }
+
+ if (lsame_(direct, "F")) {
+ prevlastv = *n;
+ i__1 = *k;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ prevlastv = f2cmax(prevlastv,i__);
+ i__2 = i__;
+ if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) {
+
+/* H(i) = I */
+
+ i__2 = i__;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = j + i__ * t_dim1;
+ t[i__3].r = 0.f, t[i__3].i = 0.f;
+ }
+ } else {
+
+/* general case */
+
+ if (lsame_(storev, "C")) {
+/* Skip any trailing zeros. */
+ i__2 = i__ + 1;
+ for (lastv = *n; lastv >= i__2; --lastv) {
+ i__3 = lastv + i__ * v_dim1;
+ if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
+ myexit_();
+ }
+ }
+ i__2 = i__ - 1;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = j + i__ * t_dim1;
+ i__4 = i__;
+ q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i;
+ r_cnjg(&q__3, &v[i__ + j * v_dim1]);
+ q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i =
+ q__2.r * q__3.i + q__2.i * q__3.r;
+ t[i__3].r = q__1.r, t[i__3].i = q__1.i;
+ }
+ j = f2cmin(lastv,prevlastv);
+
+/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */
+
+ i__2 = j - i__;
+ i__3 = i__ - 1;
+ i__4 = i__;
+ q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
+ cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__
+ + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &
+ c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1);
+ } else {
+/* Skip any trailing zeros. */
+ i__2 = i__ + 1;
+ for (lastv = *n; lastv >= i__2; --lastv) {
+ i__3 = i__ + lastv * v_dim1;
+ if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
+ myexit_();
+ }
+ }
+ i__2 = i__ - 1;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = j + i__ * t_dim1;
+ i__4 = i__;
+ q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i;
+ i__5 = j + i__ * v_dim1;
+ q__1.r = q__2.r * v[i__5].r - q__2.i * v[i__5].i,
+ q__1.i = q__2.r * v[i__5].i + q__2.i * v[i__5]
+ .r;
+ t[i__3].r = q__1.r, t[i__3].i = q__1.i;
+ }
+ j = f2cmin(lastv,prevlastv);
+
+/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */
+
+ i__2 = i__ - 1;
+ i__3 = j - i__;
+ i__4 = i__;
+ q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
+ cgemm_("N", "C", &i__2, &c__1, &i__3, &q__1, &v[(i__ + 1)
+ * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1],
+ ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt);
+ }
+
+/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+ i__2 = i__ - 1;
+ ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+ t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+ i__2 = i__ + i__ * t_dim1;
+ i__3 = i__;
+ t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+ if (i__ > 1) {
+ prevlastv = f2cmax(prevlastv,lastv);
+ } else {
+ prevlastv = lastv;
+ }
+ }
+ }
+ } else {
+ prevlastv = 1;
+ for (i__ = *k; i__ >= 1; --i__) {
+ i__1 = i__;
+ if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) {
+
+/* H(i) = I */
+
+ i__1 = *k;
+ for (j = i__; j <= i__1; ++j) {
+ i__2 = j + i__ * t_dim1;
+ t[i__2].r = 0.f, t[i__2].i = 0.f;
+ }
+ } else {
+
+/* general case */
+
+ if (i__ < *k) {
+ if (lsame_(storev, "C")) {
+/* Skip any leading zeros. */
+ i__1 = i__ - 1;
+ for (lastv = 1; lastv <= i__1; ++lastv) {
+ i__2 = lastv + i__ * v_dim1;
+ if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
+ myexit_();
+ }
+ }
+ i__1 = *k;
+ for (j = i__ + 1; j <= i__1; ++j) {
+ i__2 = j + i__ * t_dim1;
+ i__3 = i__;
+ q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i;
+ r_cnjg(&q__3, &v[*n - *k + i__ + j * v_dim1]);
+ q__1.r = q__2.r * q__3.r - q__2.i * q__3.i,
+ q__1.i = q__2.r * q__3.i + q__2.i *
+ q__3.r;
+ t[i__2].r = q__1.r, t[i__2].i = q__1.i;
+ }
+ j = f2cmax(lastv,prevlastv);
+
+/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */
+
+ i__1 = *n - *k + i__ - j;
+ i__2 = *k - i__;
+ i__3 = i__;
+ q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+ cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[
+ j + (i__ + 1) * v_dim1], ldv, &v[j + i__ *
+ v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ *
+ t_dim1], &c__1);
+ } else {
+/* Skip any leading zeros. */
+ i__1 = i__ - 1;
+ for (lastv = 1; lastv <= i__1; ++lastv) {
+ i__2 = i__ + lastv * v_dim1;
+ if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
+ myexit_();
+ }
+ }
+ i__1 = *k;
+ for (j = i__ + 1; j <= i__1; ++j) {
+ i__2 = j + i__ * t_dim1;
+ i__3 = i__;
+ q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i;
+ i__4 = j + (*n - *k + i__) * v_dim1;
+ q__1.r = q__2.r * v[i__4].r - q__2.i * v[i__4].i,
+ q__1.i = q__2.r * v[i__4].i + q__2.i * v[
+ i__4].r;
+ t[i__2].r = q__1.r, t[i__2].i = q__1.i;
+ }
+ j = f2cmax(lastv,prevlastv);
+
+/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */
+
+ i__1 = *k - i__;
+ i__2 = *n - *k + i__ - j;
+ i__3 = i__;
+ q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+ cgemm_("N", "C", &i__1, &c__1, &i__2, &q__1, &v[i__ +
+ 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
+ ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt);
+ }
+
+/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+ i__1 = *k - i__;
+ ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+ + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+ t_dim1], &c__1)
+ ;
+ if (i__ > 1) {
+ prevlastv = f2cmin(prevlastv,lastv);
+ } else {
+ prevlastv = lastv;
+ }
+ }
+ i__1 = i__ + i__ * t_dim1;
+ i__2 = i__;
+ t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+ }
+ }
+ }
+ return 0;
+
+/* End of CLARFT_LVL2 */
+
+} /* clarft_lvl2__ */
+
diff --git a/lapack-netlib/SRC/clarft_lvl2.f b/lapack-netlib/SRC/clarft_lvl2.f
new file mode 100644
index 0000000000..3b0aea1132
--- /dev/null
+++ b/lapack-netlib/SRC/clarft_lvl2.f
@@ -0,0 +1,328 @@
+*> \brief \b CLARFT_LVL2: Level 2 BLAS version for terminating case of CLARFT
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+*> Download CLARFT_LVL2 + dependencies
+*>
+*> [TGZ]
+*>
+*> [ZIP]
+*>
+*> [TXT]
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+* T, LDT )
+*
+* .. Scalar Arguments ..
+* CHARACTER DIRECT, STOREV
+* INTEGER K, LDT, LDV, N
+* ..
+* .. Array Arguments ..
+* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * )
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> CLARFT_LVL2 forms the triangular factor T of a complex block reflector H
+*> of order n, which is defined as a product of k elementary reflectors.
+*>
+*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*>
+*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*>
+*> If STOREV = 'C', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th column of the array V, and
+*>
+*> H = I - V * T * V**H
+*>
+*> If STOREV = 'R', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th row of the array V, and
+*>
+*> H = I - V**H * T * V
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] DIRECT
+*> \verbatim
+*> DIRECT is CHARACTER*1
+*> Specifies the order in which the elementary reflectors are
+*> multiplied to form the block reflector:
+*> = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*> = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*> \endverbatim
+*>
+*> \param[in] STOREV
+*> \verbatim
+*> STOREV is CHARACTER*1
+*> Specifies how the vectors which define the elementary
+*> reflectors are stored (see also Further Details):
+*> = 'C': columnwise
+*> = 'R': rowwise
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> The order of the block reflector H. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*> K is INTEGER
+*> The order of the triangular factor T (= the number of
+*> elementary reflectors). K >= 1.
+*> \endverbatim
+*>
+*> \param[in] V
+*> \verbatim
+*> V is COMPLEX array, dimension
+*> (LDV,K) if STOREV = 'C'
+*> (LDV,N) if STOREV = 'R'
+*> The matrix V. See further details.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*> LDV is INTEGER
+*> The leading dimension of the array V.
+*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*> \endverbatim
+*>
+*> \param[in] TAU
+*> \verbatim
+*> TAU is COMPLEX array, dimension (K)
+*> TAU(i) must contain the scalar factor of the elementary
+*> reflector H(i).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*> T is COMPLEX array, dimension (LDT,K)
+*> The k by k triangular factor T of the block reflector.
+*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*> lower triangular. The rest of the array is not used.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*> LDT is INTEGER
+*> The leading dimension of the array T. LDT >= K.
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup larft
+*
+*> \par Further Details:
+* =====================
+*>
+*> \verbatim
+*>
+*> The shape of the matrix V and the storage of the vectors which define
+*> the H(i) is best illustrated by the following example with n = 5 and
+*> k = 3. The elements equal to 1 are not stored.
+*>
+*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
+*>
+*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
+*> ( v1 1 ) ( 1 v2 v2 v2 )
+*> ( v1 v2 1 ) ( 1 v3 v3 )
+*> ( v1 v2 v3 )
+*> ( v1 v2 v3 )
+*>
+*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
+*>
+*> V = ( v1 v2 v3 ) V = ( v1 v1 1 )
+*> ( v1 v2 v3 ) ( v2 v2 v2 1 )
+*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
+*> ( 1 v3 )
+*> ( 1 )
+*> \endverbatim
+*>
+* =====================================================================
+ SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+ $ T, LDT )
+*
+* -- LAPACK auxiliary routine --
+* -- LAPACK is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+* .. Scalar Arguments ..
+ CHARACTER DIRECT, STOREV
+ INTEGER K, LDT, LDV, N
+* ..
+* .. Array Arguments ..
+ COMPLEX T( LDT, * ), TAU( * ), V( LDV, * )
+* ..
+*
+* =====================================================================
+*
+* .. Parameters ..
+ COMPLEX ONE, ZERO
+ PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ),
+ $ ZERO = ( 0.0E+0, 0.0E+0 ) )
+* ..
+* .. Local Scalars ..
+ INTEGER I, J, PREVLASTV, LASTV
+* ..
+* .. External Subroutines ..
+ EXTERNAL CGEMM, CGEMV, CTRMV
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ EXTERNAL LSAME
+* ..
+* .. Executable Statements ..
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+ IF( LSAME( DIRECT, 'F' ) ) THEN
+ PREVLASTV = N
+ DO I = 1, K
+ PREVLASTV = MAX( PREVLASTV, I )
+ IF( TAU( I ).EQ.ZERO ) THEN
+*
+* H(i) = I
+*
+ DO J = 1, I
+ T( J, I ) = ZERO
+ END DO
+ ELSE
+*
+* general case
+*
+ IF( LSAME( STOREV, 'C' ) ) THEN
+* Skip any trailing zeros.
+ DO LASTV = N, I+1, -1
+ IF( V( LASTV, I ).NE.ZERO ) EXIT
+ END DO
+ DO J = 1, I-1
+ T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
+ END DO
+ J = MIN( LASTV, PREVLASTV )
+*
+* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
+*
+ CALL CGEMV( 'Conjugate transpose', J-I, I-1,
+ $ -TAU( I ), V( I+1, 1 ), LDV,
+ $ V( I+1, I ), 1,
+ $ ONE, T( 1, I ), 1 )
+ ELSE
+* Skip any trailing zeros.
+ DO LASTV = N, I+1, -1
+ IF( V( I, LASTV ).NE.ZERO ) EXIT
+ END DO
+ DO J = 1, I-1
+ T( J, I ) = -TAU( I ) * V( J , I )
+ END DO
+ J = MIN( LASTV, PREVLASTV )
+*
+* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
+*
+ CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
+ $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
+ $ ONE, T( 1, I ), LDT )
+ END IF
+*
+* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+ CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
+ $ T,
+ $ LDT, T( 1, I ), 1 )
+ T( I, I ) = TAU( I )
+ IF( I.GT.1 ) THEN
+ PREVLASTV = MAX( PREVLASTV, LASTV )
+ ELSE
+ PREVLASTV = LASTV
+ END IF
+ END IF
+ END DO
+ ELSE
+ PREVLASTV = 1
+ DO I = K, 1, -1
+ IF( TAU( I ).EQ.ZERO ) THEN
+*
+* H(i) = I
+*
+ DO J = I, K
+ T( J, I ) = ZERO
+ END DO
+ ELSE
+*
+* general case
+*
+ IF( I.LT.K ) THEN
+ IF( LSAME( STOREV, 'C' ) ) THEN
+* Skip any leading zeros.
+ DO LASTV = 1, I-1
+ IF( V( LASTV, I ).NE.ZERO ) EXIT
+ END DO
+ DO J = I+1, K
+ T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
+ END DO
+ J = MAX( LASTV, PREVLASTV )
+*
+* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
+*
+ CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I,
+ $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
+ $ 1, ONE, T( I+1, I ), 1 )
+ ELSE
+* Skip any leading zeros.
+ DO LASTV = 1, I-1
+ IF( V( I, LASTV ).NE.ZERO ) EXIT
+ END DO
+ DO J = I+1, K
+ T( J, I ) = -TAU( I ) * V( J, N-K+I )
+ END DO
+ J = MAX( LASTV, PREVLASTV )
+*
+* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
+*
+ CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J,
+ $ -TAU( I ),
+ $ V( I+1, J ), LDV, V( I, J ), LDV,
+ $ ONE, T( I+1, I ), LDT )
+ END IF
+*
+* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+ CALL CTRMV( 'Lower', 'No transpose', 'Non-unit',
+ $ K-I,
+ $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+ IF( I.GT.1 ) THEN
+ PREVLASTV = MIN( PREVLASTV, LASTV )
+ ELSE
+ PREVLASTV = LASTV
+ END IF
+ END IF
+ T( I, I ) = TAU( I )
+ END IF
+ END DO
+ END IF
+ RETURN
+*
+* End of CLARFT_LVL2
+*
+ END
diff --git a/lapack-netlib/SRC/dlarft.c b/lapack-netlib/SRC/dlarft.c
index 65ce8807fc..9a968c286f 100644
--- a/lapack-netlib/SRC/dlarft.c
+++ b/lapack-netlib/SRC/dlarft.c
@@ -190,8 +190,8 @@ typedef struct Namelist Namelist;
#define c_abs(z) (cabsf(Cf(z)))
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
#ifdef _MSC_VER
-#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
-#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);}
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
#else
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
@@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
-static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@@ -378,19 +377,24 @@ static integer smaxloc_(float *w, integer s, integer e, integer *n)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
+
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i \brief \b DLARFT forms the triangular factor T of a block reflector H = I - vtvH */
@@ -522,7 +539,6 @@ static doublereal c_b6 = 1.;
/* Online html documentation available at */
/* http://www.netlib.org/lapack/explore-html/ */
-/* > \htmlonly */
/* > Download DLARFT + dependencies */
/* > */
@@ -533,12 +549,11 @@ f"> */
/* > */
/* > [TXT] */
-/* > \endhtmlonly */
/* Definition: */
/* =========== */
-/* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
+/* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
/* CHARACTER DIRECT, STOREV */
/* INTEGER K, LDT, LDV, N */
@@ -646,9 +661,7 @@ f"> */
/* > \author Univ. of Colorado Denver */
/* > \author NAG Ltd. */
-/* > \date December 2016 */
-
-/* > \ingroup doubleOTHERauxiliary */
+/* > \ingroup larft */
/* > \par Further Details: */
/* ===================== */
@@ -677,212 +690,536 @@ f"> */
/* > \endverbatim */
/* > */
/* ===================================================================== */
-/* Subroutine */ void dlarft_(char *direct, char *storev, integer *n, integer *
+/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer *
k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t,
integer *ldt)
{
/* System generated locals */
- integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
- doublereal d__1;
+ address a__1[2];
+ integer t_dim1, t_offset, v_dim1, v_offset, i__1[2], i__2, i__3;
+ char ch__1[2];
/* Local variables */
- integer i__, j;
+ integer i__, j, l;
+ logical lq, ql, qr;
+ integer nx;
+ extern /* Subroutine */ int dlarft_lvl2__(char *, char *, integer *,
+ integer *, doublereal *, integer *, doublereal *, doublereal *,
+ integer *);
+ logical dirf, colv;
+ extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+ integer *, doublereal *, doublereal *, integer *, doublereal *,
+ integer *, doublereal *, doublereal *, integer *);
extern logical lsame_(char *, char *);
- extern /* Subroutine */ void dgemv_(char *, integer *, integer *,
- doublereal *, doublereal *, integer *, doublereal *, integer *,
- doublereal *, doublereal *, integer *);
- integer lastv;
- extern /* Subroutine */ void dtrmv_(char *, char *, char *, integer *,
- doublereal *, integer *, doublereal *, integer *);
- integer prevlastv;
+ extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *,
+ integer *, integer *, doublereal *, doublereal *, integer *,
+ doublereal *, integer *), dlacpy_(
+ char *, integer *, integer *, doublereal *, integer *, doublereal
+ *, integer *);
+ extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+ integer *, integer *, ftnlen, ftnlen);
-/* -- LAPACK auxiliary routine (version 3.7.0) -- */
+/* -- LAPACK auxiliary routine -- */
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-/* December 2016 */
-/* ===================================================================== */
+
+
+
+
+
+
+
+
+/* The general scheme used is inspired by the approach inside DGEQRT3 */
+/* which was (at the time of writing this code): */
+/* Based on the algorithm of Elmroth and Gustavson, */
+/* IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+
/* Quick return if possible */
/* Parameter adjustments */
v_dim1 = *ldv;
- v_offset = 1 + v_dim1 * 1;
+ v_offset = 1 + v_dim1;
v -= v_offset;
--tau;
t_dim1 = *ldt;
- t_offset = 1 + t_dim1 * 1;
+ t_offset = 1 + t_dim1;
t -= t_offset;
/* Function Body */
- if (*n == 0) {
- return;
+ if (*n == 0 || *k == 0) {
+ return 0;
}
- if (lsame_(direct, "F")) {
- prevlastv = *n;
- i__1 = *k;
- for (i__ = 1; i__ <= i__1; ++i__) {
- prevlastv = f2cmax(i__,prevlastv);
- if (tau[i__] == 0.) {
+/* Base case */
-/* H(i) = I */
+ if (*n == 1 || *k == 1) {
+ t[t_dim1 + 1] = tau[1];
+ return 0;
+ }
- i__2 = i__;
- for (j = 1; j <= i__2; ++j) {
- t[j + i__ * t_dim1] = 0.;
- }
- } else {
-
-/* general case */
-
- if (lsame_(storev, "C")) {
-/* Skip any trailing zeros. */
- i__2 = i__ + 1;
- for (lastv = *n; lastv >= i__2; --lastv) {
- if (v[lastv + i__ * v_dim1] != 0.) {
- myexit_();
- }
- }
- i__2 = i__ - 1;
- for (j = 1; j <= i__2; ++j) {
- t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1];
- }
- j = f2cmin(lastv,prevlastv);
-
-/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */
-
- i__2 = j - i__;
- i__3 = i__ - 1;
- d__1 = -tau[i__];
- dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + 1 +
- v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, &
- c_b6, &t[i__ * t_dim1 + 1], &c__1);
- } else {
-/* Skip any trailing zeros. */
- i__2 = i__ + 1;
- for (lastv = *n; lastv >= i__2; --lastv) {
- if (v[i__ + lastv * v_dim1] != 0.) {
- myexit_();
- }
- }
- i__2 = i__ - 1;
- for (j = 1; j <= i__2; ++j) {
- t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1];
- }
- j = f2cmin(lastv,prevlastv);
-
-/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */
-
- i__2 = i__ - 1;
- i__3 = j - i__;
- d__1 = -tau[i__];
- dgemv_("No transpose", &i__2, &i__3, &d__1, &v[(i__ + 1) *
- v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1],
- ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1);
- }
+/* Determine when to cross over into the level 2 based implementation */
+
+/* Writing concatenation */
+ i__1[0] = 1, a__1[0] = direct;
+ i__1[1] = 1, a__1[1] = storev;
+ s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+ nx = ilaenv_(&c__3, "DLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, (
+ ftnlen)2);
+ if (*k < nx) {
+ dlarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+ return 0;
+ }
-/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+/* Beginning of executable statements */
- i__2 = i__ - 1;
- dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
- t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
- t[i__ + i__ * t_dim1] = tau[i__];
- if (i__ > 1) {
- prevlastv = f2cmax(prevlastv,lastv);
- } else {
- prevlastv = lastv;
- }
+ l = *k / 2;
+
+/* Determine what kind of Q we need to compute */
+/* We assume that if the user doesn't provide 'F' for DIRECT, */
+/* then they meant to provide 'B' and if they don't provide */
+/* 'C' for STOREV, then they meant to provide 'R' */
+
+ dirf = lsame_(direct, "F");
+ colv = lsame_(storev, "C");
+
+/* QR happens when we have forward direction in column storage */
+
+ qr = dirf && colv;
+
+/* LQ happens when we have forward direction in row storage */
+
+ lq = dirf && ! colv;
+
+/* QL happens when we have backward direction in column storage */
+
+ ql = ! dirf && colv;
+
+/* The last case is RQ. Due to how we structured this, if the */
+/* above 3 are false, then RQ must be true, so we never store */
+/* this */
+/* RQ happens when we have backward direction in row storage */
+/* RQ = (.NOT.DIRF).AND.(.NOT.COLV) */
+
+ if (qr) {
+
+/* Break V apart into 6 components */
+
+/* V = |---------------| */
+/* |V_{1,1} 0 | */
+/* |V_{2,1} V_{2,2}| */
+/* |V_{3,1} V_{3,2}| */
+/* |---------------| */
+
+/* V_{1,1}\in\R^{l,l} unit lower triangular */
+/* V_{2,1}\in\R^{k-l,l} rectangular */
+/* V_{3,1}\in\R^{n-k,l} rectangular */
+
+/* V_{2,2}\in\R^{k-l,k-l} unit lower triangular */
+/* V_{3,2}\in\R^{n-k,k-l} rectangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} T_{1,2}| */
+/* |0 T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\R^{l, l} upper triangular */
+/* T_{2,2}\in\R^{k-l, k-l} upper triangular */
+/* T_{1,2}\in\R^{l, k-l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */
+/* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */
+
+/* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */
+
+/* Then, we can define the matrix V as */
+/* V = |-------| */
+/* |V_1 V_2| */
+/* |-------| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V*T*V' */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{1,2} */
+
+/* Compute T_{1,1} recursively */
+
+ dlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ i__2 = *n - l;
+ i__3 = *k - l;
+ dlarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1],
+ ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/* Compute T_{1,2} */
+/* T_{1,2} = V_{2,1}' */
+
+ i__2 = l;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = *k - l;
+ for (i__ = 1; i__ <= i__3; ++i__) {
+ t[j + (l + i__) * t_dim1] = v[l + i__ + j * v_dim1];
}
}
- } else {
- prevlastv = 1;
- for (i__ = *k; i__ >= 1; --i__) {
- if (tau[i__] == 0.) {
-/* H(i) = I */
+/* T_{1,2} = T_{1,2}*V_{2,2} */
- i__1 = *k;
- for (j = i__; j <= i__1; ++j) {
- t[j + i__ * t_dim1] = 0.;
- }
- } else {
-
-/* general case */
-
- if (i__ < *k) {
- if (lsame_(storev, "C")) {
-/* Skip any leading zeros. */
- i__1 = i__ - 1;
- for (lastv = 1; lastv <= i__1; ++lastv) {
- if (v[lastv + i__ * v_dim1] != 0.) {
- myexit_();
- }
- }
- i__1 = *k;
- for (j = i__ + 1; j <= i__1; ++j) {
- t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__
- + j * v_dim1];
- }
- j = f2cmax(lastv,prevlastv);
-
-/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */
-
- i__1 = *n - *k + i__ - j;
- i__2 = *k - i__;
- d__1 = -tau[i__];
- dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__
- + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
- c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], &
- c__1);
- } else {
-/* Skip any leading zeros. */
- i__1 = i__ - 1;
- for (lastv = 1; lastv <= i__1; ++lastv) {
- if (v[i__ + lastv * v_dim1] != 0.) {
- myexit_();
- }
- }
- i__1 = *k;
- for (j = i__ + 1; j <= i__1; ++j) {
- t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k
- + i__) * v_dim1];
- }
- j = f2cmax(lastv,prevlastv);
-
-/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */
-
- i__1 = *k - i__;
- i__2 = *n - *k + i__ - j;
- d__1 = -tau[i__];
- dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ +
- 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
- ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1);
- }
-
-/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
- i__1 = *k - i__;
- dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
- + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
- t_dim1], &c__1)
- ;
- if (i__ > 1) {
- prevlastv = f2cmin(prevlastv,lastv);
- } else {
- prevlastv = lastv;
- }
- }
- t[i__ + i__ * t_dim1] = tau[i__];
+ i__2 = *k - l;
+ dtrmm_("Right", "Lower", "No transpose", "Unit", &l, &i__2, &c_b13, &
+ v[l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1],
+ ldt);
+
+/* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__2 = *k - l;
+ i__3 = *n - *k;
+ dgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[*k +
+ 1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b13,
+ &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* At this point, we have that T_{1,2} = V_1'*V_2 */
+/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/* respectively. */
+
+/* T_{1,2} = -T_{1,1}*T_{1,2} */
+
+ i__2 = *k - l;
+ dtrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+ &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*T_{2,2} */
+
+ i__2 = *k - l;
+ dtrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, &
+ c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1
+ + 1], ldt);
+ } else if (lq) {
+
+/* Break V apart into 6 components */
+
+/* V = |----------------------| */
+/* |V_{1,1} V_{1,2} V{1,3}| */
+/* |0 V_{2,2} V{2,3}| */
+/* |----------------------| */
+
+/* V_{1,1}\in\R^{l,l} unit upper triangular */
+/* V_{1,2}\in\R^{l,k-l} rectangular */
+/* V_{1,3}\in\R^{l,n-k} rectangular */
+
+/* V_{2,2}\in\R^{k-l,k-l} unit upper triangular */
+/* V_{2,3}\in\R^{k-l,n-k} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} T_{1,2}| */
+/* |0 T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\R^{l, l} upper triangular */
+/* T_{2,2}\in\R^{k-l, k-l} upper triangular */
+/* T_{1,2}\in\R^{l, k-l} rectangular */
+
+/* Then, consider the product: */
+
+/* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */
+/* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */
+
+/* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */
+
+/* Then, we can define the matrix V as */
+/* V = |---| */
+/* |V_1| */
+/* |V_2| */
+/* |---| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V'*T*V */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{1,2} */
+
+/* Compute T_{1,1} recursively */
+
+ dlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ i__2 = *n - l;
+ i__3 = *k - l;
+ dlarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1],
+ ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/* Compute T_{1,2} */
+/* T_{1,2} = V_{1,2} */
+
+ i__2 = *k - l;
+ dlacpy_("All", &l, &i__2, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) *
+ t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*V_{2,2}' */
+
+ i__2 = *k - l;
+ dtrmm_("Right", "Upper", "Transpose", "Unit", &l, &i__2, &c_b13, &v[l
+ + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__2 = *k - l;
+ i__3 = *n - *k;
+ dgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[(*k
+ + 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, &
+ c_b13, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* At this point, we have that T_{1,2} = V_1*V_2' */
+/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/* respectively. */
+
+/* T_{1,2} = -T_{1,1}*T_{1,2} */
+
+ i__2 = *k - l;
+ dtrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+ &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*T_{2,2} */
+
+ i__2 = *k - l;
+ dtrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, &
+ c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1
+ + 1], ldt);
+ } else if (ql) {
+
+/* Break V apart into 6 components */
+
+/* V = |---------------| */
+/* |V_{1,1} V_{1,2}| */
+/* |V_{2,1} V_{2,2}| */
+/* |0 V_{3,2}| */
+/* |---------------| */
+
+/* V_{1,1}\in\R^{n-k,k-l} rectangular */
+/* V_{2,1}\in\R^{k-l,k-l} unit upper triangular */
+
+/* V_{1,2}\in\R^{n-k,l} rectangular */
+/* V_{2,2}\in\R^{k-l,l} rectangular */
+/* V_{3,2}\in\R^{l,l} unit upper triangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} 0 | */
+/* |T_{2,1} T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */
+/* T_{2,2}\in\R^{l, l} non-unit lower triangular */
+/* T_{2,1}\in\R^{k-l, l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */
+/* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */
+
+/* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */
+
+/* Then, we can define the matrix V as */
+/* V = |-------| */
+/* |V_1 V_2| */
+/* |-------| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V*T*V' */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{2,1} */
+
+/* Compute T_{1,1} recursively */
+
+ i__2 = *n - l;
+ i__3 = *k - l;
+ dlarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ dlarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, &
+ tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/* Compute T_{2,1} */
+/* T_{2,1} = V_{2,2}' */
+
+ i__2 = *k - l;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = l;
+ for (i__ = 1; i__ <= i__3; ++i__) {
+ t[*k - l + i__ + j * t_dim1] = v[*n - *k + j + (*k - l + i__)
+ * v_dim1];
}
}
- }
- return;
-/* End of DLARFT */
+/* T_{2,1} = T_{2,1}*V_{2,1} */
+
+ i__2 = *k - l;
+ dtrmm_("Right", "Upper", "No transpose", "Unit", &l, &i__2, &c_b13, &
+ v[*n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+ i__2 = *k - l;
+ i__3 = *n - *k;
+ dgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[(*k
+ - l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b13, &t[*k
+ - l + 1 + t_dim1], ldt);
+
+/* At this point, we have that T_{2,1} = V_2'*V_1 */
+/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/* respectively. */
+
+/* T_{2,1} = -T_{2,2}*T_{2,1} */
+
+ i__2 = *k - l;
+ dtrmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+ &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 +
+ t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*T_{1,1} */
+
+ i__2 = *k - l;
+ dtrmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__2, &
+ c_b13, &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+ } else {
+
+/* Else means RQ case */
+
+/* Break V apart into 6 components */
+
+/* V = |-----------------------| */
+/* |V_{1,1} V_{1,2} 0 | */
+/* |V_{2,1} V_{2,2} V_{2,3}| */
+/* |-----------------------| */
+
+/* V_{1,1}\in\R^{k-l,n-k} rectangular */
+/* V_{1,2}\in\R^{k-l,k-l} unit lower triangular */
+
+/* V_{2,1}\in\R^{l,n-k} rectangular */
+/* V_{2,2}\in\R^{l,k-l} rectangular */
+/* V_{2,3}\in\R^{l,l} unit lower triangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} 0 | */
+/* |T_{2,1} T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */
+/* T_{2,2}\in\R^{l, l} non-unit lower triangular */
+/* T_{2,1}\in\R^{k-l, l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */
+/* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */
+
+/* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */
+
+/* Then, we can define the matrix V as */
+/* V = |---| */
+/* |V_1| */
+/* |V_2| */
+/* |---| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V'*T*V */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{2,1} */
+
+/* Compute T_{1,1} recursively */
+
+ i__2 = *n - l;
+ i__3 = *k - l;
+ dlarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ dlarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k
+ - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/* Compute T_{2,1} */
+/* T_{2,1} = V_{2,2} */
+
+ i__2 = *k - l;
+ dlacpy_("All", &l, &i__2, &v[*k - l + 1 + (*n - *k + 1) * v_dim1],
+ ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*V_{1,2}' */
+
+ i__2 = *k - l;
+ dtrmm_("Right", "Lower", "Transpose", "Unit", &l, &i__2, &c_b13, &v[(*
+ n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__2 = *k - l;
+ i__3 = *n - *k;
+ dgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[*k -
+ l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b13, &t[*k - l +
+ 1 + t_dim1], ldt);
+
+/* At this point, we have that T_{2,1} = V_2*V_1' */
+/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/* respectively. */
+
+/* T_{2,1} = -T_{2,2}*T_{2,1} */
+
+ i__2 = *k - l;
+ dtrmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b22,
+ &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 +
+ t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*T_{1,1} */
+
+ i__2 = *k - l;
+ dtrmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b13,
+ &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+ }
+ return 0;
} /* dlarft_ */
diff --git a/lapack-netlib/SRC/dlarft.f b/lapack-netlib/SRC/dlarft.f
index c27bb1a806..fa2ea0d134 100644
--- a/lapack-netlib/SRC/dlarft.f
+++ b/lapack-netlib/SRC/dlarft.f
@@ -5,7 +5,6 @@
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
-*> \htmlonly
*> Download DLARFT + dependencies
*>
*> [TGZ]
@@ -13,7 +12,6 @@
*> [ZIP]
*>
*> [TXT]
-*> \endhtmlonly
*
* Definition:
* ===========
@@ -161,6 +159,7 @@
* =====================================================================
RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV,
$ TAU, T, LDT )
+ IMPLICIT NONE
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -183,7 +182,7 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV,
*
* .. Local Scalars ..
*
- INTEGER I,J,L
+ INTEGER I,J,L,NX
LOGICAL QR,LQ,QL,DIRF,COLV
*
* .. External Subroutines ..
@@ -193,7 +192,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV,
* .. External Functions..
*
LOGICAL LSAME
- EXTERNAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
*
* The general scheme used is inspired by the approach inside DGEQRT3
* which was (at the time of writing this code):
@@ -215,6 +215,14 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV,
RETURN
END IF
*
+* Determine when to cross over into the level 2 based implementation
+*
+ NX = ILAENV(3, "DLARFT", DIRECT // STOREV, N, K, -1, -1)
+ IF(K.LT.NX) THEN
+ CALL DLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
+ RETURN
+ END IF
+*
* Beginning of executable statements
*
L = K / 2
diff --git a/lapack-netlib/SRC/dlarft_lvl2.c b/lapack-netlib/SRC/dlarft_lvl2.c
new file mode 100644
index 0000000000..6bd748c436
--- /dev/null
+++ b/lapack-netlib/SRC/dlarft_lvl2.c
@@ -0,0 +1,895 @@
+#include
+#include
+#include
+#include
+#include
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef blasint logical;
+
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{ flag cierr;
+ ftnint ciunit;
+ flag ciend;
+ char *cifmt;
+ ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{ flag icierr;
+ char *iciunit;
+ flag iciend;
+ char *icifmt;
+ ftnint icirlen;
+ ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{ flag oerr;
+ ftnint ounit;
+ char *ofnm;
+ ftnlen ofnmlen;
+ char *osta;
+ char *oacc;
+ char *ofm;
+ ftnint orl;
+ char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{ flag cerr;
+ ftnint cunit;
+ char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{ flag aerr;
+ ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{ flag inerr;
+ ftnint inunit;
+ char *infile;
+ ftnlen infilen;
+ ftnint *inex; /*parameters in standard's order*/
+ ftnint *inopen;
+ ftnint *innum;
+ ftnint *innamed;
+ char *inname;
+ ftnlen innamlen;
+ char *inacc;
+ ftnlen inacclen;
+ char *inseq;
+ ftnlen inseqlen;
+ char *indir;
+ ftnlen indirlen;
+ char *infmt;
+ ftnlen infmtlen;
+ char *inform;
+ ftnint informlen;
+ char *inunf;
+ ftnlen inunflen;
+ ftnint *inrecl;
+ ftnint *innrec;
+ char *inblank;
+ ftnlen inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype { /* for multiple entry points */
+ integer1 g;
+ shortint h;
+ integer i;
+ /* longint j; */
+ real r;
+ doublereal d;
+ complex c;
+ doublecomplex z;
+ };
+
+typedef union Multitype Multitype;
+
+struct Vardesc { /* for Namelist */
+ char *name;
+ char *addr;
+ ftnlen *dims;
+ int type;
+ };
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+ char *name;
+ Vardesc **vars;
+ int nvars;
+ };
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b) ((a) >> (b) & 1)
+#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+ float pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+static double dpow_ui(double x, integer n) {
+ double pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+ complex pow={1.0,0.0}; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+ for(u = n; ; ) {
+ if(u & 01) pow.r *= x.r, pow.i *= x.i;
+ if(u >>= 1) x.r *= x.r, x.i *= x.i;
+ else break;
+ }
+ }
+ _Fcomplex p={pow.r, pow.i};
+ return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+ _Complex float pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+ _Dcomplex pow={1.0,0.0}; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+ for(u = n; ; ) {
+ if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+ if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+ else break;
+ }
+ }
+ _Dcomplex p = {pow._Val[0], pow._Val[1]};
+ return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+ _Complex double pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#endif
+
+static integer pow_ii(integer x, integer n) {
+ integer pow; unsigned long int u;
+ if (n <= 0) {
+ if (n == 0 || x == 1) pow = 1;
+ else if (x != -1) pow = x == 0 ? 1/x : 0;
+ else n = -n;
+ }
+ if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+ u = n;
+ for(pow = 1; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+ double m; integer i, mi;
+ for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+ if (w[i-1]>m) mi=i ,m=w[i-1];
+ return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+ float m; integer i, mi;
+ for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+ if (w[i-1]>m) mi=i ,m=w[i-1];
+ return mi-s+1;
+}
+
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+ integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+ _Fcomplex zdotc = {0.0, 0.0};
+ if (incx == 1 && incy == 1) {
+ for (i=0;i \brief \b DLARFT_LVL2: Level 2 BLAS version for terminating case of DLARFT. */
+
+/* =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/* http://www.netlib.org/lapack/explore-html/ */
+
+/* > Download DLARFT_LVL2 + dependencies */
+/* > */
+/* > [TGZ] */
+/* > */
+/* > [ZIP] */
+/* > */
+/* > [TXT] */
+
+/* Definition: */
+/* =========== */
+
+/* SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */
+/* T, LDT ) */
+
+/* CHARACTER DIRECT, STOREV */
+/* INTEGER K, LDT, LDV, N */
+/* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/* ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DLARFT_LVL2 forms the triangular factor T of a real block reflector H */
+/* > of order n, which is defined as a product of k elementary reflectors. */
+/* > */
+/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
+/* > */
+/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
+/* > */
+/* > If STOREV = 'C', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th column of the array V, and */
+/* > */
+/* > H = I - V * T * V**T */
+/* > */
+/* > If STOREV = 'R', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th row of the array V, and */
+/* > */
+/* > H = I - V**T * T * V */
+/* > \endverbatim */
+
+/* Arguments: */
+/* ========== */
+
+/* > \param[in] DIRECT */
+/* > \verbatim */
+/* > DIRECT is CHARACTER*1 */
+/* > Specifies the order in which the elementary reflectors are */
+/* > multiplied to form the block reflector: */
+/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */
+/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] STOREV */
+/* > \verbatim */
+/* > STOREV is CHARACTER*1 */
+/* > Specifies how the vectors which define the elementary */
+/* > reflectors are stored (see also Further Details): */
+/* > = 'C': columnwise */
+/* > = 'R': rowwise */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* > N is INTEGER */
+/* > The order of the block reflector H. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* > K is INTEGER */
+/* > The order of the triangular factor T (= the number of */
+/* > elementary reflectors). K >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* > V is DOUBLE PRECISION array, dimension */
+/* > (LDV,K) if STOREV = 'C' */
+/* > (LDV,N) if STOREV = 'R' */
+/* > The matrix V. See further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* > LDV is INTEGER */
+/* > The leading dimension of the array V. */
+/* > If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TAU */
+/* > \verbatim */
+/* > TAU is DOUBLE PRECISION array, dimension (K) */
+/* > TAU(i) must contain the scalar factor of the elementary */
+/* > reflector H(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* > T is DOUBLE PRECISION array, dimension (LDT,K) */
+/* > The k by k triangular factor T of the block reflector. */
+/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
+/* > lower triangular. The rest of the array is not used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* > LDT is INTEGER */
+/* > The leading dimension of the array T. LDT >= K. */
+/* > \endverbatim */
+
+/* Authors: */
+/* ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup larft */
+
+/* > \par Further Details: */
+/* ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The shape of the matrix V and the storage of the vectors which define */
+/* > the H(i) is best illustrated by the following example with n = 5 and */
+/* > k = 3. The elements equal to 1 are not stored. */
+/* > */
+/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */
+/* > */
+/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */
+/* > ( v1 1 ) ( 1 v2 v2 v2 ) */
+/* > ( v1 v2 1 ) ( 1 v3 v3 ) */
+/* > ( v1 v2 v3 ) */
+/* > ( v1 v2 v3 ) */
+/* > */
+/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */
+/* > */
+/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */
+/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */
+/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */
+/* > ( 1 v3 ) */
+/* > ( 1 ) */
+/* > \endverbatim */
+/* > */
+/* ===================================================================== */
+/* Subroutine */ int dlarft_lvl2__(char *direct, char *storev, integer *n,
+ integer *k, doublereal *v, integer *ldv, doublereal *tau, doublereal *
+ t, integer *ldt)
+{
+ /* System generated locals */
+ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
+ doublereal d__1;
+
+ /* Local variables */
+ integer i__, j, prevlastv;
+ extern logical lsame_(char *, char *);
+ extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+ doublereal *, doublereal *, integer *, doublereal *, integer *,
+ doublereal *, doublereal *, integer *);
+ integer lastv;
+ extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *,
+ doublereal *, integer *, doublereal *, integer *), mecago_();
+
+
+/* -- LAPACK auxiliary routine -- */
+/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
+/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+
+/* ===================================================================== */
+
+
+/* Quick return if possible */
+
+ /* Parameter adjustments */
+ v_dim1 = *ldv;
+ v_offset = 1 + v_dim1;
+ v -= v_offset;
+ --tau;
+ t_dim1 = *ldt;
+ t_offset = 1 + t_dim1;
+ t -= t_offset;
+
+ /* Function Body */
+ if (*n == 0) {
+ return 0;
+ }
+
+ if (lsame_(direct, "F")) {
+ prevlastv = *n;
+ i__1 = *k;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ prevlastv = f2cmax(i__,prevlastv);
+ if (tau[i__] == 0.) {
+
+/* H(i) = I */
+
+ i__2 = i__;
+ for (j = 1; j <= i__2; ++j) {
+ t[j + i__ * t_dim1] = 0.;
+ }
+ } else {
+
+/* general case */
+
+ if (lsame_(storev, "C")) {
+/* Skip any trailing zeros. */
+ i__2 = i__ + 1;
+ for (lastv = *n; lastv >= i__2; --lastv) {
+ if (v[lastv + i__ * v_dim1] != 0.) {
+ myexit_();
+ }
+ }
+ i__2 = i__ - 1;
+ for (j = 1; j <= i__2; ++j) {
+ t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1];
+ }
+ j = f2cmin(lastv,prevlastv);
+
+/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */
+
+ i__2 = j - i__;
+ i__3 = i__ - 1;
+ d__1 = -tau[i__];
+ dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + 1 +
+ v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, &
+ c_b6, &t[i__ * t_dim1 + 1], &c__1);
+ } else {
+/* Skip any trailing zeros. */
+ i__2 = i__ + 1;
+ for (lastv = *n; lastv >= i__2; --lastv) {
+ if (v[i__ + lastv * v_dim1] != 0.) {
+ myexit_();
+ }
+ }
+ i__2 = i__ - 1;
+ for (j = 1; j <= i__2; ++j) {
+ t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1];
+ }
+ j = f2cmin(lastv,prevlastv);
+
+/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */
+
+ i__2 = i__ - 1;
+ i__3 = j - i__;
+ d__1 = -tau[i__];
+ dgemv_("No transpose", &i__2, &i__3, &d__1, &v[(i__ + 1) *
+ v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1],
+ ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1);
+ }
+
+/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+ i__2 = i__ - 1;
+ dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+ t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+ t[i__ + i__ * t_dim1] = tau[i__];
+ if (i__ > 1) {
+ prevlastv = f2cmax(prevlastv,lastv);
+ } else {
+ prevlastv = lastv;
+ }
+ }
+ }
+ } else {
+ prevlastv = 1;
+ for (i__ = *k; i__ >= 1; --i__) {
+ if (tau[i__] == 0.) {
+
+/* H(i) = I */
+
+ i__1 = *k;
+ for (j = i__; j <= i__1; ++j) {
+ t[j + i__ * t_dim1] = 0.;
+ }
+ } else {
+
+/* general case */
+
+ if (i__ < *k) {
+ if (lsame_(storev, "C")) {
+/* Skip any leading zeros. */
+ i__1 = i__ - 1;
+ for (lastv = 1; lastv <= i__1; ++lastv) {
+ if (v[lastv + i__ * v_dim1] != 0.) {
+ myexit_();
+ }
+ }
+ i__1 = *k;
+ for (j = i__ + 1; j <= i__1; ++j) {
+ t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__
+ + j * v_dim1];
+ }
+ j = f2cmax(lastv,prevlastv);
+
+/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */
+
+ i__1 = *n - *k + i__ - j;
+ i__2 = *k - i__;
+ d__1 = -tau[i__];
+ dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__
+ + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
+ c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], &
+ c__1);
+ } else {
+/* Skip any leading zeros. */
+ i__1 = i__ - 1;
+ for (lastv = 1; lastv <= i__1; ++lastv) {
+ if (v[i__ + lastv * v_dim1] != 0.) {
+ myexit_();
+ }
+ }
+ i__1 = *k;
+ for (j = i__ + 1; j <= i__1; ++j) {
+ t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k
+ + i__) * v_dim1];
+ }
+ j = f2cmax(lastv,prevlastv);
+
+/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */
+
+ i__1 = *k - i__;
+ i__2 = *n - *k + i__ - j;
+ d__1 = -tau[i__];
+ dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ +
+ 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
+ ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1);
+ }
+
+/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+ i__1 = *k - i__;
+ dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+ + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+ t_dim1], &c__1)
+ ;
+ if (i__ > 1) {
+ prevlastv = f2cmin(prevlastv,lastv);
+ } else {
+ prevlastv = lastv;
+ }
+ }
+ t[i__ + i__ * t_dim1] = tau[i__];
+ }
+ }
+ }
+ return 0;
+
+/* End of DLARFT_LVL2 */
+
+} /* dlarft_lvl2__ */
+
diff --git a/lapack-netlib/SRC/dlarft_lvl2.f b/lapack-netlib/SRC/dlarft_lvl2.f
new file mode 100644
index 0000000000..9614df466d
--- /dev/null
+++ b/lapack-netlib/SRC/dlarft_lvl2.f
@@ -0,0 +1,326 @@
+*> \brief \b DLARFT_LVL2: Level 2 BLAS version for terminating case of DLARFT.
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+*> Download DLARFT_LVL2 + dependencies
+*>
+*> [TGZ]
+*>
+*> [ZIP]
+*>
+*> [TXT]
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+* T, LDT )
+*
+* .. Scalar Arguments ..
+* CHARACTER DIRECT, STOREV
+* INTEGER K, LDT, LDV, N
+* ..
+* .. Array Arguments ..
+* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * )
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> DLARFT_LVL2 forms the triangular factor T of a real block reflector H
+*> of order n, which is defined as a product of k elementary reflectors.
+*>
+*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*>
+*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*>
+*> If STOREV = 'C', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th column of the array V, and
+*>
+*> H = I - V * T * V**T
+*>
+*> If STOREV = 'R', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th row of the array V, and
+*>
+*> H = I - V**T * T * V
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] DIRECT
+*> \verbatim
+*> DIRECT is CHARACTER*1
+*> Specifies the order in which the elementary reflectors are
+*> multiplied to form the block reflector:
+*> = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*> = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*> \endverbatim
+*>
+*> \param[in] STOREV
+*> \verbatim
+*> STOREV is CHARACTER*1
+*> Specifies how the vectors which define the elementary
+*> reflectors are stored (see also Further Details):
+*> = 'C': columnwise
+*> = 'R': rowwise
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> The order of the block reflector H. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*> K is INTEGER
+*> The order of the triangular factor T (= the number of
+*> elementary reflectors). K >= 1.
+*> \endverbatim
+*>
+*> \param[in] V
+*> \verbatim
+*> V is DOUBLE PRECISION array, dimension
+*> (LDV,K) if STOREV = 'C'
+*> (LDV,N) if STOREV = 'R'
+*> The matrix V. See further details.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*> LDV is INTEGER
+*> The leading dimension of the array V.
+*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*> \endverbatim
+*>
+*> \param[in] TAU
+*> \verbatim
+*> TAU is DOUBLE PRECISION array, dimension (K)
+*> TAU(i) must contain the scalar factor of the elementary
+*> reflector H(i).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*> T is DOUBLE PRECISION array, dimension (LDT,K)
+*> The k by k triangular factor T of the block reflector.
+*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*> lower triangular. The rest of the array is not used.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*> LDT is INTEGER
+*> The leading dimension of the array T. LDT >= K.
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup larft
+*
+*> \par Further Details:
+* =====================
+*>
+*> \verbatim
+*>
+*> The shape of the matrix V and the storage of the vectors which define
+*> the H(i) is best illustrated by the following example with n = 5 and
+*> k = 3. The elements equal to 1 are not stored.
+*>
+*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
+*>
+*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
+*> ( v1 1 ) ( 1 v2 v2 v2 )
+*> ( v1 v2 1 ) ( 1 v3 v3 )
+*> ( v1 v2 v3 )
+*> ( v1 v2 v3 )
+*>
+*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
+*>
+*> V = ( v1 v2 v3 ) V = ( v1 v1 1 )
+*> ( v1 v2 v3 ) ( v2 v2 v2 1 )
+*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
+*> ( 1 v3 )
+*> ( 1 )
+*> \endverbatim
+*>
+* =====================================================================
+ SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+ $ T, LDT )
+*
+* -- LAPACK auxiliary routine --
+* -- LAPACK is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+* .. Scalar Arguments ..
+ CHARACTER DIRECT, STOREV
+ INTEGER K, LDT, LDV, N
+* ..
+* .. Array Arguments ..
+ DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * )
+* ..
+*
+* =====================================================================
+*
+* .. Parameters ..
+ DOUBLE PRECISION ONE, ZERO
+ PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+* ..
+* .. Local Scalars ..
+ INTEGER I, J, PREVLASTV, LASTV
+* ..
+* .. External Subroutines ..
+ EXTERNAL DGEMV, DTRMV
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ EXTERNAL LSAME
+* ..
+* .. Executable Statements ..
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+ IF( LSAME( DIRECT, 'F' ) ) THEN
+ PREVLASTV = N
+ DO I = 1, K
+ PREVLASTV = MAX( I, PREVLASTV )
+ IF( TAU( I ).EQ.ZERO ) THEN
+*
+* H(i) = I
+*
+ DO J = 1, I
+ T( J, I ) = ZERO
+ END DO
+ ELSE
+*
+* general case
+*
+ IF( LSAME( STOREV, 'C' ) ) THEN
+* Skip any trailing zeros.
+ DO LASTV = N, I+1, -1
+ IF( V( LASTV, I ).NE.ZERO ) EXIT
+ END DO
+ DO J = 1, I-1
+ T( J, I ) = -TAU( I ) * V( I , J )
+ END DO
+ J = MIN( LASTV, PREVLASTV )
+*
+* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
+*
+ CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ),
+ $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE,
+ $ T( 1, I ), 1 )
+ ELSE
+* Skip any trailing zeros.
+ DO LASTV = N, I+1, -1
+ IF( V( I, LASTV ).NE.ZERO ) EXIT
+ END DO
+ DO J = 1, I-1
+ T( J, I ) = -TAU( I ) * V( J , I )
+ END DO
+ J = MIN( LASTV, PREVLASTV )
+*
+* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
+*
+ CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ),
+ $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE,
+ $ T( 1, I ), 1 )
+ END IF
+*
+* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+ CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
+ $ T,
+ $ LDT, T( 1, I ), 1 )
+ T( I, I ) = TAU( I )
+ IF( I.GT.1 ) THEN
+ PREVLASTV = MAX( PREVLASTV, LASTV )
+ ELSE
+ PREVLASTV = LASTV
+ END IF
+ END IF
+ END DO
+ ELSE
+ PREVLASTV = 1
+ DO I = K, 1, -1
+ IF( TAU( I ).EQ.ZERO ) THEN
+*
+* H(i) = I
+*
+ DO J = I, K
+ T( J, I ) = ZERO
+ END DO
+ ELSE
+*
+* general case
+*
+ IF( I.LT.K ) THEN
+ IF( LSAME( STOREV, 'C' ) ) THEN
+* Skip any leading zeros.
+ DO LASTV = 1, I-1
+ IF( V( LASTV, I ).NE.ZERO ) EXIT
+ END DO
+ DO J = I+1, K
+ T( J, I ) = -TAU( I ) * V( N-K+I , J )
+ END DO
+ J = MAX( LASTV, PREVLASTV )
+*
+* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
+*
+ CALL DGEMV( 'Transpose', N-K+I-J, K-I,
+ $ -TAU( I ),
+ $ V( J, I+1 ), LDV, V( J, I ), 1, ONE,
+ $ T( I+1, I ), 1 )
+ ELSE
+* Skip any leading zeros.
+ DO LASTV = 1, I-1
+ IF( V( I, LASTV ).NE.ZERO ) EXIT
+ END DO
+ DO J = I+1, K
+ T( J, I ) = -TAU( I ) * V( J, N-K+I )
+ END DO
+ J = MAX( LASTV, PREVLASTV )
+*
+* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
+*
+ CALL DGEMV( 'No transpose', K-I, N-K+I-J,
+ $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV,
+ $ ONE, T( I+1, I ), 1 )
+ END IF
+*
+* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+ CALL DTRMV( 'Lower', 'No transpose', 'Non-unit',
+ $ K-I,
+ $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+ IF( I.GT.1 ) THEN
+ PREVLASTV = MIN( PREVLASTV, LASTV )
+ ELSE
+ PREVLASTV = LASTV
+ END IF
+ END IF
+ T( I, I ) = TAU( I )
+ END IF
+ END DO
+ END IF
+ RETURN
+*
+* End of DLARFT_LVL2
+*
+ END
diff --git a/lapack-netlib/SRC/ilaenv.f b/lapack-netlib/SRC/ilaenv.f
index e74a2b35ec..e108108d7f 100644
--- a/lapack-netlib/SRC/ilaenv.f
+++ b/lapack-netlib/SRC/ilaenv.f
@@ -5,7 +5,6 @@
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
-*> \htmlonly
*> Download ILAENV + dependencies
*>
*> [TGZ]
@@ -13,7 +12,6 @@
*> [ZIP]
*>
*> [TXT]
-*> \endhtmlonly
*
* Definition:
* ===========
@@ -159,6 +157,7 @@
*>
* =====================================================================
INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 )
+ IMPLICIT NONE
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -667,6 +666,10 @@ INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 )
IF( C3.EQ.'HD3' ) THEN
NX = 128
END IF
+ ELSE IF( C2.EQ.'LA' ) THEN
+ IF( C3.EQ.'RFT' ) THEN
+ NX = 64
+ END IF
END IF
ILAENV = NX
RETURN
diff --git a/lapack-netlib/SRC/slarft.c b/lapack-netlib/SRC/slarft.c
index 6f8350b218..34445172b1 100644
--- a/lapack-netlib/SRC/slarft.c
+++ b/lapack-netlib/SRC/slarft.c
@@ -190,8 +190,8 @@ typedef struct Namelist Namelist;
#define c_abs(z) (cabsf(Cf(z)))
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
#ifdef _MSC_VER
-#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
-#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);}
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
#else
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
@@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
-static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@@ -378,19 +377,24 @@ static integer smaxloc_(float *w, integer s, integer e, integer *n)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
+
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i \brief \b SLARFT forms the triangular factor T of a block reflector H = I - vtvH */
@@ -522,7 +539,6 @@ static real c_b6 = 1.f;
/* Online html documentation available at */
/* http://www.netlib.org/lapack/explore-html/ */
-/* > \htmlonly */
/* > Download SLARFT + dependencies */
/* > */
@@ -533,12 +549,11 @@ f"> */
/* > */
/* > [TXT] */
-/* > \endhtmlonly */
/* Definition: */
/* =========== */
-/* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
+/* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
/* CHARACTER DIRECT, STOREV */
/* INTEGER K, LDT, LDV, N */
@@ -643,12 +658,10 @@ f"> */
/* > \author Univ. of Tennessee */
/* > \author Univ. of California Berkeley */
-/* > \author Univ. of Colorado Denver */
+/* > \author Johnathan Rhyne, Univ. of Colorado Denver (original author, 2024) */
/* > \author NAG Ltd. */
-/* > \date December 2016 */
-
-/* > \ingroup realOTHERauxiliary */
+/* > \ingroup larft */
/* > \par Further Details: */
/* ===================== */
@@ -677,211 +690,533 @@ f"> */
/* > \endverbatim */
/* > */
/* ===================================================================== */
-/* Subroutine */ void slarft_(char *direct, char *storev, integer *n, integer *
+/* Subroutine */ int slarft_(char *direct, char *storev, integer *n, integer *
k, real *v, integer *ldv, real *tau, real *t, integer *ldt)
{
/* System generated locals */
- integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
- real r__1;
+ address a__1[2];
+ integer t_dim1, t_offset, v_dim1, v_offset, i__1[2], i__2, i__3;
+ char ch__1[2];
/* Local variables */
- integer i__, j;
+ integer i__, j, l;
+ logical lq, ql, qr;
+ integer nx;
+ extern /* Subroutine */ int slarft_lvl2__(char *, char *, integer *,
+ integer *, real *, integer *, real *, real *, integer *);
+ logical dirf, colv;
extern logical lsame_(char *, char *);
- extern /* Subroutine */ void sgemv_(char *, integer *, integer *, real *,
- real *, integer *, real *, integer *, real *, real *, integer *);
- integer lastv;
- extern /* Subroutine */ void strmv_(char *, char *, char *, integer *,
- real *, integer *, real *, integer *);
- integer prevlastv;
- extern /* Subroutine */ void mecago_();
-
-
-/* -- LAPACK auxiliary routine (version 3.7.0) -- */
+ extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+ integer *, real *, real *, integer *, real *, integer *, real *,
+ real *, integer *), strmm_(char *, char *, char *,
+ char *, integer *, integer *, real *, real *, integer *, real *,
+ integer *);
+ extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+ integer *, integer *, ftnlen, ftnlen);
+ extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
+ integer *, real *, integer *);
+
+
+/* -- LAPACK auxiliary routine -- */
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-/* December 2016 */
-/* ===================================================================== */
+
+
+
+
+
+
+
+
+/* The general scheme used is inspired by the approach inside DGEQRT3 */
+/* which was (at the time of writing this code): */
+/* Based on the algorithm of Elmroth and Gustavson, */
+/* IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+
/* Quick return if possible */
/* Parameter adjustments */
v_dim1 = *ldv;
- v_offset = 1 + v_dim1 * 1;
+ v_offset = 1 + v_dim1;
v -= v_offset;
--tau;
t_dim1 = *ldt;
- t_offset = 1 + t_dim1 * 1;
+ t_offset = 1 + t_dim1;
t -= t_offset;
/* Function Body */
- if (*n == 0) {
- return;
+ if (*n == 0 || *k == 0) {
+ return 0;
}
- if (lsame_(direct, "F")) {
- prevlastv = *n;
- i__1 = *k;
- for (i__ = 1; i__ <= i__1; ++i__) {
- prevlastv = f2cmax(i__,prevlastv);
- if (tau[i__] == 0.f) {
+/* Base case */
-/* H(i) = I */
+ if (*n == 1 || *k == 1) {
+ t[t_dim1 + 1] = tau[1];
+ return 0;
+ }
- i__2 = i__;
- for (j = 1; j <= i__2; ++j) {
- t[j + i__ * t_dim1] = 0.f;
- }
- } else {
-
-/* general case */
-
- if (lsame_(storev, "C")) {
-/* Skip any trailing zeros. */
- i__2 = i__ + 1;
- for (lastv = *n; lastv >= i__2; --lastv) {
- if (v[lastv + i__ * v_dim1] != 0.f) {
- myexit_();
- }
- }
- i__2 = i__ - 1;
- for (j = 1; j <= i__2; ++j) {
- t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1];
- }
- j = f2cmin(lastv,prevlastv);
-
-/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */
-
- i__2 = j - i__;
- i__3 = i__ - 1;
- r__1 = -tau[i__];
- sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + 1 +
- v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, &
- c_b6, &t[i__ * t_dim1 + 1], &c__1);
- } else {
-/* Skip any trailing zeros. */
- i__2 = i__ + 1;
- for (lastv = *n; lastv >= i__2; --lastv) {
- if (v[i__ + lastv * v_dim1] != 0.f) {
- myexit_();
- }
- }
- i__2 = i__ - 1;
- for (j = 1; j <= i__2; ++j) {
- t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1];
- }
- j = f2cmin(lastv,prevlastv);
-
-/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */
-
- i__2 = i__ - 1;
- i__3 = j - i__;
- r__1 = -tau[i__];
- sgemv_("No transpose", &i__2, &i__3, &r__1, &v[(i__ + 1) *
- v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1],
- ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1);
- }
+/* Determine when to cross over into the level 2 based implementation */
+
+/* Writing concatenation */
+ i__1[0] = 1, a__1[0] = direct;
+ i__1[1] = 1, a__1[1] = storev;
+ s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+ nx = ilaenv_(&c__3, "SLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, (
+ ftnlen)2);
+ if (*k < nx) {
+ slarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+ return 0;
+ }
-/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+/* Beginning of executable statements */
- i__2 = i__ - 1;
- strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
- t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
- t[i__ + i__ * t_dim1] = tau[i__];
- if (i__ > 1) {
- prevlastv = f2cmax(prevlastv,lastv);
- } else {
- prevlastv = lastv;
- }
+ l = *k / 2;
+
+/* Determine what kind of Q we need to compute */
+/* We assume that if the user doesn't provide 'F' for DIRECT, */
+/* then they meant to provide 'B' and if they don't provide */
+/* 'C' for STOREV, then they meant to provide 'R' */
+
+ dirf = lsame_(direct, "F");
+ colv = lsame_(storev, "C");
+
+/* QR happens when we have forward direction in column storage */
+
+ qr = dirf && colv;
+
+/* LQ happens when we have forward direction in row storage */
+
+ lq = dirf && ! colv;
+
+/* QL happens when we have backward direction in column storage */
+
+ ql = ! dirf && colv;
+
+/* The last case is RQ. Due to how we structured this, if the */
+/* above 3 are false, then RQ must be true, so we never store */
+/* this */
+/* RQ happens when we have backward direction in row storage */
+/* RQ = (.NOT.DIRF).AND.(.NOT.COLV) */
+
+ if (qr) {
+
+/* Break V apart into 6 components */
+
+/* V = |---------------| */
+/* |V_{1,1} 0 | */
+/* |V_{2,1} V_{2,2}| */
+/* |V_{3,1} V_{3,2}| */
+/* |---------------| */
+
+/* V_{1,1}\in\R^{l,l} unit lower triangular */
+/* V_{2,1}\in\R^{k-l,l} rectangular */
+/* V_{3,1}\in\R^{n-k,l} rectangular */
+
+/* V_{2,2}\in\R^{k-l,k-l} unit lower triangular */
+/* V_{3,2}\in\R^{n-k,k-l} rectangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} T_{1,2}| */
+/* |0 T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\R^{l, l} upper triangular */
+/* T_{2,2}\in\R^{k-l, k-l} upper triangular */
+/* T_{1,2}\in\R^{l, k-l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */
+/* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */
+
+/* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */
+
+/* Then, we can define the matrix V as */
+/* V = |-------| */
+/* |V_1 V_2| */
+/* |-------| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V*T*V' */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{1,2} */
+
+/* Compute T_{1,1} recursively */
+
+ slarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ i__2 = *n - l;
+ i__3 = *k - l;
+ slarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1],
+ ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/* Compute T_{1,2} */
+/* T_{1,2} = V_{2,1}' */
+
+ i__2 = l;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = *k - l;
+ for (i__ = 1; i__ <= i__3; ++i__) {
+ t[j + (l + i__) * t_dim1] = v[l + i__ + j * v_dim1];
}
}
- } else {
- prevlastv = 1;
- for (i__ = *k; i__ >= 1; --i__) {
- if (tau[i__] == 0.f) {
-/* H(i) = I */
+/* T_{1,2} = T_{1,2}*V_{2,2} */
- i__1 = *k;
- for (j = i__; j <= i__1; ++j) {
- t[j + i__ * t_dim1] = 0.f;
- }
- } else {
-
-/* general case */
-
- if (i__ < *k) {
- if (lsame_(storev, "C")) {
-/* Skip any leading zeros. */
- i__1 = i__ - 1;
- for (lastv = 1; lastv <= i__1; ++lastv) {
- if (v[lastv + i__ * v_dim1] != 0.f) {
- myexit_();
- }
- }
- i__1 = *k;
- for (j = i__ + 1; j <= i__1; ++j) {
- t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__
- + j * v_dim1];
- }
- j = f2cmax(lastv,prevlastv);
-
-/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */
-
- i__1 = *n - *k + i__ - j;
- i__2 = *k - i__;
- r__1 = -tau[i__];
- sgemv_("Transpose", &i__1, &i__2, &r__1, &v[j + (i__
- + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
- c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], &
- c__1);
- } else {
-/* Skip any leading zeros. */
- i__1 = i__ - 1;
- for (lastv = 1; lastv <= i__1; ++lastv) {
- if (v[i__ + lastv * v_dim1] != 0.f) {
- myexit_();
- }
- }
- i__1 = *k;
- for (j = i__ + 1; j <= i__1; ++j) {
- t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k
- + i__) * v_dim1];
- }
- j = f2cmax(lastv,prevlastv);
-
-/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */
-
- i__1 = *k - i__;
- i__2 = *n - *k + i__ - j;
- r__1 = -tau[i__];
- sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ +
- 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
- ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1);
- }
-
-/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
- i__1 = *k - i__;
- strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
- + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
- t_dim1], &c__1)
- ;
- if (i__ > 1) {
- prevlastv = f2cmin(prevlastv,lastv);
- } else {
- prevlastv = lastv;
- }
- }
- t[i__ + i__ * t_dim1] = tau[i__];
+ i__2 = *k - l;
+ strmm_("Right", "Lower", "No transpose", "Unit", &l, &i__2, &c_b13, &
+ v[l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1],
+ ldt);
+
+/* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__2 = *k - l;
+ i__3 = *n - *k;
+ sgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[*k +
+ 1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b13,
+ &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* At this point, we have that T_{1,2} = V_1'*V_2 */
+/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/* respectively. */
+
+/* T_{1,2} = -T_{1,1}*T_{1,2} */
+
+ i__2 = *k - l;
+ strmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+ &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*T_{2,2} */
+
+ i__2 = *k - l;
+ strmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, &
+ c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1
+ + 1], ldt);
+ } else if (lq) {
+
+/* Break V apart into 6 components */
+
+/* V = |----------------------| */
+/* |V_{1,1} V_{1,2} V{1,3}| */
+/* |0 V_{2,2} V{2,3}| */
+/* |----------------------| */
+
+/* V_{1,1}\in\R^{l,l} unit upper triangular */
+/* V_{1,2}\in\R^{l,k-l} rectangular */
+/* V_{1,3}\in\R^{l,n-k} rectangular */
+
+/* V_{2,2}\in\R^{k-l,k-l} unit upper triangular */
+/* V_{2,3}\in\R^{k-l,n-k} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} T_{1,2}| */
+/* |0 T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\R^{l, l} upper triangular */
+/* T_{2,2}\in\R^{k-l, k-l} upper triangular */
+/* T_{1,2}\in\R^{l, k-l} rectangular */
+
+/* Then, consider the product: */
+
+/* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */
+/* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */
+
+/* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */
+
+/* Then, we can define the matrix V as */
+/* V = |---| */
+/* |V_1| */
+/* |V_2| */
+/* |---| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V'*T*V */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{1,2} */
+
+/* Compute T_{1,1} recursively */
+
+ slarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ i__2 = *n - l;
+ i__3 = *k - l;
+ slarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1],
+ ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/* Compute T_{1,2} */
+/* T_{1,2} = V_{1,2} */
+
+ i__2 = *k - l;
+ slacpy_("All", &l, &i__2, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) *
+ t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*V_{2,2}' */
+
+ i__2 = *k - l;
+ strmm_("Right", "Upper", "Transpose", "Unit", &l, &i__2, &c_b13, &v[l
+ + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__2 = *k - l;
+ i__3 = *n - *k;
+ sgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[(*k
+ + 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, &
+ c_b13, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* At this point, we have that T_{1,2} = V_1*V_2' */
+/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/* respectively. */
+
+/* T_{1,2} = -T_{1,1}*T_{1,2} */
+
+ i__2 = *k - l;
+ strmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+ &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*T_{2,2} */
+
+ i__2 = *k - l;
+ strmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, &
+ c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1
+ + 1], ldt);
+ } else if (ql) {
+
+/* Break V apart into 6 components */
+
+/* V = |---------------| */
+/* |V_{1,1} V_{1,2}| */
+/* |V_{2,1} V_{2,2}| */
+/* |0 V_{3,2}| */
+/* |---------------| */
+
+/* V_{1,1}\in\R^{n-k,k-l} rectangular */
+/* V_{2,1}\in\R^{k-l,k-l} unit upper triangular */
+
+/* V_{1,2}\in\R^{n-k,l} rectangular */
+/* V_{2,2}\in\R^{k-l,l} rectangular */
+/* V_{3,2}\in\R^{l,l} unit upper triangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} 0 | */
+/* |T_{2,1} T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */
+/* T_{2,2}\in\R^{l, l} non-unit lower triangular */
+/* T_{2,1}\in\R^{k-l, l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */
+/* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */
+
+/* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */
+
+/* Then, we can define the matrix V as */
+/* V = |-------| */
+/* |V_1 V_2| */
+/* |-------| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V*T*V' */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{2,1} */
+
+/* Compute T_{1,1} recursively */
+
+ i__2 = *n - l;
+ i__3 = *k - l;
+ slarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ slarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, &
+ tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/* Compute T_{2,1} */
+/* T_{2,1} = V_{2,2}' */
+
+ i__2 = *k - l;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = l;
+ for (i__ = 1; i__ <= i__3; ++i__) {
+ t[*k - l + i__ + j * t_dim1] = v[*n - *k + j + (*k - l + i__)
+ * v_dim1];
}
}
- }
- return;
-/* End of SLARFT */
+/* T_{2,1} = T_{2,1}*V_{2,1} */
+
+ i__2 = *k - l;
+ strmm_("Right", "Upper", "No transpose", "Unit", &l, &i__2, &c_b13, &
+ v[*n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+ i__2 = *k - l;
+ i__3 = *n - *k;
+ sgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[(*k
+ - l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b13, &t[*k
+ - l + 1 + t_dim1], ldt);
+
+/* At this point, we have that T_{2,1} = V_2'*V_1 */
+/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/* respectively. */
+
+/* T_{2,1} = -T_{2,2}*T_{2,1} */
+
+ i__2 = *k - l;
+ strmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+ &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 +
+ t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*T_{1,1} */
+
+ i__2 = *k - l;
+ strmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__2, &
+ c_b13, &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+ } else {
+
+/* Else means RQ case */
+
+/* Break V apart into 6 components */
+
+/* V = |-----------------------| */
+/* |V_{1,1} V_{1,2} 0 | */
+/* |V_{2,1} V_{2,2} V_{2,3}| */
+/* |-----------------------| */
+
+/* V_{1,1}\in\R^{k-l,n-k} rectangular */
+/* V_{1,2}\in\R^{k-l,k-l} unit lower triangular */
+
+/* V_{2,1}\in\R^{l,n-k} rectangular */
+/* V_{2,2}\in\R^{l,k-l} rectangular */
+/* V_{2,3}\in\R^{l,l} unit lower triangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} 0 | */
+/* |T_{2,1} T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */
+/* T_{2,2}\in\R^{l, l} non-unit lower triangular */
+/* T_{2,1}\in\R^{k-l, l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */
+/* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */
+
+/* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */
+
+/* Then, we can define the matrix V as */
+/* V = |---| */
+/* |V_1| */
+/* |V_2| */
+/* |---| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V'TV */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{2,1} */
+
+/* Compute T_{1,1} recursively */
+
+ i__2 = *n - l;
+ i__3 = *k - l;
+ slarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ slarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k
+ - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/* Compute T_{2,1} */
+/* T_{2,1} = V_{2,2} */
+
+ i__2 = *k - l;
+ slacpy_("All", &l, &i__2, &v[*k - l + 1 + (*n - *k + 1) * v_dim1],
+ ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*V_{1,2}' */
+
+ i__2 = *k - l;
+ strmm_("Right", "Lower", "Transpose", "Unit", &l, &i__2, &c_b13, &v[(*
+ n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__2 = *k - l;
+ i__3 = *n - *k;
+ sgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[*k -
+ l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b13, &t[*k - l +
+ 1 + t_dim1], ldt);
+
+/* At this point, we have that T_{2,1} = V_2*V_1' */
+/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/* respectively. */
+
+/* T_{2,1} = -T_{2,2}*T_{2,1} */
+
+ i__2 = *k - l;
+ strmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b22,
+ &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 +
+ t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*T_{1,1} */
+
+ i__2 = *k - l;
+ strmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b13,
+ &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+ }
+ return 0;
} /* slarft_ */
diff --git a/lapack-netlib/SRC/slarft.f b/lapack-netlib/SRC/slarft.f
index ad3a4d924c..3e0eac751e 100644
--- a/lapack-netlib/SRC/slarft.f
+++ b/lapack-netlib/SRC/slarft.f
@@ -5,7 +5,6 @@
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
-*> \htmlonly
*> Download SLARFT + dependencies
*>
*> [TGZ]
@@ -13,7 +12,6 @@
*> [ZIP]
*>
*> [TXT]
-*> \endhtmlonly
*
* Definition:
* ===========
@@ -161,6 +159,7 @@
* =====================================================================
RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV,
$ TAU, T, LDT )
+ IMPLICIT NONE
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -183,7 +182,7 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV,
*
* .. Local Scalars ..
*
- INTEGER I,J,L
+ INTEGER I,J,L,NX
LOGICAL QR,LQ,QL,DIRF,COLV
*
* .. External Subroutines ..
@@ -193,7 +192,8 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV,
* .. External Functions..
*
LOGICAL LSAME
- EXTERNAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
*
* The general scheme used is inspired by the approach inside DGEQRT3
* which was (at the time of writing this code):
@@ -215,6 +215,14 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV,
RETURN
END IF
*
+* Determine when to cross over into the level 2 based implementation
+*
+ NX = ILAENV(3, "SLARFT", DIRECT // STOREV, N, K, -1, -1)
+ IF(K.LT.NX) THEN
+ CALL SLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
+ RETURN
+ END IF
+*
* Beginning of executable statements
*
L = K / 2
diff --git a/lapack-netlib/SRC/slarft_lvl2.c b/lapack-netlib/SRC/slarft_lvl2.c
new file mode 100644
index 0000000000..d1b8f31b65
--- /dev/null
+++ b/lapack-netlib/SRC/slarft_lvl2.c
@@ -0,0 +1,896 @@
+#include
+#include
+#include
+#include
+#include
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef blasint logical;
+
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{ flag cierr;
+ ftnint ciunit;
+ flag ciend;
+ char *cifmt;
+ ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{ flag icierr;
+ char *iciunit;
+ flag iciend;
+ char *icifmt;
+ ftnint icirlen;
+ ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{ flag oerr;
+ ftnint ounit;
+ char *ofnm;
+ ftnlen ofnmlen;
+ char *osta;
+ char *oacc;
+ char *ofm;
+ ftnint orl;
+ char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{ flag cerr;
+ ftnint cunit;
+ char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{ flag aerr;
+ ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{ flag inerr;
+ ftnint inunit;
+ char *infile;
+ ftnlen infilen;
+ ftnint *inex; /*parameters in standard's order*/
+ ftnint *inopen;
+ ftnint *innum;
+ ftnint *innamed;
+ char *inname;
+ ftnlen innamlen;
+ char *inacc;
+ ftnlen inacclen;
+ char *inseq;
+ ftnlen inseqlen;
+ char *indir;
+ ftnlen indirlen;
+ char *infmt;
+ ftnlen infmtlen;
+ char *inform;
+ ftnint informlen;
+ char *inunf;
+ ftnlen inunflen;
+ ftnint *inrecl;
+ ftnint *innrec;
+ char *inblank;
+ ftnlen inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype { /* for multiple entry points */
+ integer1 g;
+ shortint h;
+ integer i;
+ /* longint j; */
+ real r;
+ doublereal d;
+ complex c;
+ doublecomplex z;
+ };
+
+typedef union Multitype Multitype;
+
+struct Vardesc { /* for Namelist */
+ char *name;
+ char *addr;
+ ftnlen *dims;
+ int type;
+ };
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+ char *name;
+ Vardesc **vars;
+ int nvars;
+ };
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b) ((a) >> (b) & 1)
+#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+ float pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+static double dpow_ui(double x, integer n) {
+ double pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+ complex pow={1.0,0.0}; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+ for(u = n; ; ) {
+ if(u & 01) pow.r *= x.r, pow.i *= x.i;
+ if(u >>= 1) x.r *= x.r, x.i *= x.i;
+ else break;
+ }
+ }
+ _Fcomplex p={pow.r, pow.i};
+ return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+ _Complex float pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+ _Dcomplex pow={1.0,0.0}; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+ for(u = n; ; ) {
+ if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+ if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+ else break;
+ }
+ }
+ _Dcomplex p = {pow._Val[0], pow._Val[1]};
+ return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+ _Complex double pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+ integer pow; unsigned long int u;
+ if (n <= 0) {
+ if (n == 0 || x == 1) pow = 1;
+ else if (x != -1) pow = x == 0 ? 1/x : 0;
+ else n = -n;
+ }
+ if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+ u = n;
+ for(pow = 1; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+ double m; integer i, mi;
+ for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+ if (w[i-1]>m) mi=i ,m=w[i-1];
+ return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+ float m; integer i, mi;
+ for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+ if (w[i-1]>m) mi=i ,m=w[i-1];
+ return mi-s+1;
+}
+
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+ integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+ _Fcomplex zdotc = {0.0, 0.0};
+ if (incx == 1 && incy == 1) {
+ for (i=0;i \brief \b SLARFT_LVL2: Level 2 BLAS version for terminating case of SLARFT. */
+
+/* =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/* http://www.netlib.org/lapack/explore-html/ */
+
+/* > Download SLARFT_LVL2 + dependencies */
+/* > */
+/* > [TGZ] */
+/* > */
+/* > [ZIP] */
+/* > */
+/* > [TXT] */
+
+/* Definition: */
+/* =========== */
+
+/* SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */
+/* T, LDT ) */
+
+/* CHARACTER DIRECT, STOREV */
+/* INTEGER K, LDT, LDV, N */
+/* REAL T( LDT, * ), TAU( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/* ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLARFT_LVL2 forms the triangular factor T of a real block reflector H */
+/* > of order n, which is defined as a product of k elementary reflectors. */
+/* > */
+/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
+/* > */
+/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
+/* > */
+/* > If STOREV = 'C', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th column of the array V, and */
+/* > */
+/* > H = I - V * T * V**T */
+/* > */
+/* > If STOREV = 'R', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th row of the array V, and */
+/* > */
+/* > H = I - V**T * T * V */
+/* > \endverbatim */
+
+/* Arguments: */
+/* ========== */
+
+/* > \param[in] DIRECT */
+/* > \verbatim */
+/* > DIRECT is CHARACTER*1 */
+/* > Specifies the order in which the elementary reflectors are */
+/* > multiplied to form the block reflector: */
+/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */
+/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] STOREV */
+/* > \verbatim */
+/* > STOREV is CHARACTER*1 */
+/* > Specifies how the vectors which define the elementary */
+/* > reflectors are stored (see also Further Details): */
+/* > = 'C': columnwise */
+/* > = 'R': rowwise */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* > N is INTEGER */
+/* > The order of the block reflector H. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* > K is INTEGER */
+/* > The order of the triangular factor T (= the number of */
+/* > elementary reflectors). K >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* > V is REAL array, dimension */
+/* > (LDV,K) if STOREV = 'C' */
+/* > (LDV,N) if STOREV = 'R' */
+/* > The matrix V. See further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* > LDV is INTEGER */
+/* > The leading dimension of the array V. */
+/* > If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TAU */
+/* > \verbatim */
+/* > TAU is REAL array, dimension (K) */
+/* > TAU(i) must contain the scalar factor of the elementary */
+/* > reflector H(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* > T is REAL array, dimension (LDT,K) */
+/* > The k by k triangular factor T of the block reflector. */
+/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
+/* > lower triangular. The rest of the array is not used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* > LDT is INTEGER */
+/* > The leading dimension of the array T. LDT >= K. */
+/* > \endverbatim */
+
+/* Authors: */
+/* ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup larft */
+
+/* > \par Further Details: */
+/* ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The shape of the matrix V and the storage of the vectors which define */
+/* > the H(i) is best illustrated by the following example with n = 5 and */
+/* > k = 3. The elements equal to 1 are not stored. */
+/* > */
+/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */
+/* > */
+/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */
+/* > ( v1 1 ) ( 1 v2 v2 v2 ) */
+/* > ( v1 v2 1 ) ( 1 v3 v3 ) */
+/* > ( v1 v2 v3 ) */
+/* > ( v1 v2 v3 ) */
+/* > */
+/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */
+/* > */
+/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */
+/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */
+/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */
+/* > ( 1 v3 ) */
+/* > ( 1 ) */
+/* > \endverbatim */
+/* > */
+/* ===================================================================== */
+/* Subroutine */ int slarft_lvl2__(char *direct, char *storev, integer *n,
+ integer *k, real *v, integer *ldv, real *tau, real *t, integer *ldt)
+{
+ /* System generated locals */
+ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
+ real r__1;
+
+ /* Local variables */
+ integer i__, j, prevlastv;
+ extern logical lsame_(char *, char *);
+ extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+ real *, integer *, real *, integer *, real *, real *, integer *);
+ integer lastv;
+ extern /* Subroutine */ int strmv_(char *, char *, char *, integer *,
+ real *, integer *, real *, integer *),
+ mecago_();
+
+
+/* -- LAPACK auxiliary routine -- */
+/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
+/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+
+/* ===================================================================== */
+
+
+/* Quick return if possible */
+
+ /* Parameter adjustments */
+ v_dim1 = *ldv;
+ v_offset = 1 + v_dim1;
+ v -= v_offset;
+ --tau;
+ t_dim1 = *ldt;
+ t_offset = 1 + t_dim1;
+ t -= t_offset;
+
+ /* Function Body */
+ if (*n == 0) {
+ return 0;
+ }
+
+ if (lsame_(direct, "F")) {
+ prevlastv = *n;
+ i__1 = *k;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ prevlastv = f2cmax(i__,prevlastv);
+ if (tau[i__] == 0.f) {
+
+/* H(i) = I */
+
+ i__2 = i__;
+ for (j = 1; j <= i__2; ++j) {
+ t[j + i__ * t_dim1] = 0.f;
+ }
+ } else {
+
+/* general case */
+
+ if (lsame_(storev, "C")) {
+/* Skip any trailing zeros. */
+ i__2 = i__ + 1;
+ for (lastv = *n; lastv >= i__2; --lastv) {
+ if (v[lastv + i__ * v_dim1] != 0.f) {
+ myexit_();
+ }
+ }
+ i__2 = i__ - 1;
+ for (j = 1; j <= i__2; ++j) {
+ t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1];
+ }
+ j = f2cmin(lastv,prevlastv);
+
+/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */
+
+ i__2 = j - i__;
+ i__3 = i__ - 1;
+ r__1 = -tau[i__];
+ sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + 1 +
+ v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, &
+ c_b6, &t[i__ * t_dim1 + 1], &c__1);
+ } else {
+/* Skip any trailing zeros. */
+ i__2 = i__ + 1;
+ for (lastv = *n; lastv >= i__2; --lastv) {
+ if (v[i__ + lastv * v_dim1] != 0.f) {
+ myexit_();
+ }
+ }
+ i__2 = i__ - 1;
+ for (j = 1; j <= i__2; ++j) {
+ t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1];
+ }
+ j = f2cmin(lastv,prevlastv);
+
+/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */
+
+ i__2 = i__ - 1;
+ i__3 = j - i__;
+ r__1 = -tau[i__];
+ sgemv_("No transpose", &i__2, &i__3, &r__1, &v[(i__ + 1) *
+ v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1],
+ ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1);
+ }
+
+/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+ i__2 = i__ - 1;
+ strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+ t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+ t[i__ + i__ * t_dim1] = tau[i__];
+ if (i__ > 1) {
+ prevlastv = f2cmax(prevlastv,lastv);
+ } else {
+ prevlastv = lastv;
+ }
+ }
+ }
+ } else {
+ prevlastv = 1;
+ for (i__ = *k; i__ >= 1; --i__) {
+ if (tau[i__] == 0.f) {
+
+/* H(i) = I */
+
+ i__1 = *k;
+ for (j = i__; j <= i__1; ++j) {
+ t[j + i__ * t_dim1] = 0.f;
+ }
+ } else {
+
+/* general case */
+
+ if (i__ < *k) {
+ if (lsame_(storev, "C")) {
+/* Skip any leading zeros. */
+ i__1 = i__ - 1;
+ for (lastv = 1; lastv <= i__1; ++lastv) {
+ if (v[lastv + i__ * v_dim1] != 0.f) {
+ myexit_();
+ }
+ }
+ i__1 = *k;
+ for (j = i__ + 1; j <= i__1; ++j) {
+ t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__
+ + j * v_dim1];
+ }
+ j = f2cmax(lastv,prevlastv);
+
+/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */
+
+ i__1 = *n - *k + i__ - j;
+ i__2 = *k - i__;
+ r__1 = -tau[i__];
+ sgemv_("Transpose", &i__1, &i__2, &r__1, &v[j + (i__
+ + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
+ c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], &
+ c__1);
+ } else {
+/* Skip any leading zeros. */
+ i__1 = i__ - 1;
+ for (lastv = 1; lastv <= i__1; ++lastv) {
+ if (v[i__ + lastv * v_dim1] != 0.f) {
+ myexit_();
+ }
+ }
+ i__1 = *k;
+ for (j = i__ + 1; j <= i__1; ++j) {
+ t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k
+ + i__) * v_dim1];
+ }
+ j = f2cmax(lastv,prevlastv);
+
+/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */
+
+ i__1 = *k - i__;
+ i__2 = *n - *k + i__ - j;
+ r__1 = -tau[i__];
+ sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ +
+ 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
+ ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1);
+ }
+
+/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+ i__1 = *k - i__;
+ strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+ + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+ t_dim1], &c__1)
+ ;
+ if (i__ > 1) {
+ prevlastv = f2cmin(prevlastv,lastv);
+ } else {
+ prevlastv = lastv;
+ }
+ }
+ t[i__ + i__ * t_dim1] = tau[i__];
+ }
+ }
+ }
+ return 0;
+
+/* End of SLARFT_LVL2 */
+
+} /* slarft_lvl2__ */
+
diff --git a/lapack-netlib/SRC/slarft_lvl2.f b/lapack-netlib/SRC/slarft_lvl2.f
new file mode 100644
index 0000000000..7107a91d5d
--- /dev/null
+++ b/lapack-netlib/SRC/slarft_lvl2.f
@@ -0,0 +1,326 @@
+*> \brief \b SLARFT_LVL2: Level 2 BLAS version for terminating case of SLARFT.
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+*> Download SLARFT_LVL2 + dependencies
+*>
+*> [TGZ]
+*>
+*> [ZIP]
+*>
+*> [TXT]
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+* T, LDT )
+*
+* .. Scalar Arguments ..
+* CHARACTER DIRECT, STOREV
+* INTEGER K, LDT, LDV, N
+* ..
+* .. Array Arguments ..
+* REAL T( LDT, * ), TAU( * ), V( LDV, * )
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> SLARFT_LVL2 forms the triangular factor T of a real block reflector H
+*> of order n, which is defined as a product of k elementary reflectors.
+*>
+*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*>
+*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*>
+*> If STOREV = 'C', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th column of the array V, and
+*>
+*> H = I - V * T * V**T
+*>
+*> If STOREV = 'R', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th row of the array V, and
+*>
+*> H = I - V**T * T * V
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] DIRECT
+*> \verbatim
+*> DIRECT is CHARACTER*1
+*> Specifies the order in which the elementary reflectors are
+*> multiplied to form the block reflector:
+*> = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*> = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*> \endverbatim
+*>
+*> \param[in] STOREV
+*> \verbatim
+*> STOREV is CHARACTER*1
+*> Specifies how the vectors which define the elementary
+*> reflectors are stored (see also Further Details):
+*> = 'C': columnwise
+*> = 'R': rowwise
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> The order of the block reflector H. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*> K is INTEGER
+*> The order of the triangular factor T (= the number of
+*> elementary reflectors). K >= 1.
+*> \endverbatim
+*>
+*> \param[in] V
+*> \verbatim
+*> V is REAL array, dimension
+*> (LDV,K) if STOREV = 'C'
+*> (LDV,N) if STOREV = 'R'
+*> The matrix V. See further details.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*> LDV is INTEGER
+*> The leading dimension of the array V.
+*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*> \endverbatim
+*>
+*> \param[in] TAU
+*> \verbatim
+*> TAU is REAL array, dimension (K)
+*> TAU(i) must contain the scalar factor of the elementary
+*> reflector H(i).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*> T is REAL array, dimension (LDT,K)
+*> The k by k triangular factor T of the block reflector.
+*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*> lower triangular. The rest of the array is not used.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*> LDT is INTEGER
+*> The leading dimension of the array T. LDT >= K.
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup larft
+*
+*> \par Further Details:
+* =====================
+*>
+*> \verbatim
+*>
+*> The shape of the matrix V and the storage of the vectors which define
+*> the H(i) is best illustrated by the following example with n = 5 and
+*> k = 3. The elements equal to 1 are not stored.
+*>
+*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
+*>
+*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
+*> ( v1 1 ) ( 1 v2 v2 v2 )
+*> ( v1 v2 1 ) ( 1 v3 v3 )
+*> ( v1 v2 v3 )
+*> ( v1 v2 v3 )
+*>
+*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
+*>
+*> V = ( v1 v2 v3 ) V = ( v1 v1 1 )
+*> ( v1 v2 v3 ) ( v2 v2 v2 1 )
+*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
+*> ( 1 v3 )
+*> ( 1 )
+*> \endverbatim
+*>
+* =====================================================================
+ SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+ $ T, LDT )
+*
+* -- LAPACK auxiliary routine --
+* -- LAPACK is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+* .. Scalar Arguments ..
+ CHARACTER DIRECT, STOREV
+ INTEGER K, LDT, LDV, N
+* ..
+* .. Array Arguments ..
+ REAL T( LDT, * ), TAU( * ), V( LDV, * )
+* ..
+*
+* =====================================================================
+*
+* .. Parameters ..
+ REAL ONE, ZERO
+ PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+* ..
+* .. Local Scalars ..
+ INTEGER I, J, PREVLASTV, LASTV
+* ..
+* .. External Subroutines ..
+ EXTERNAL SGEMV, STRMV
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ EXTERNAL LSAME
+* ..
+* .. Executable Statements ..
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+ IF( LSAME( DIRECT, 'F' ) ) THEN
+ PREVLASTV = N
+ DO I = 1, K
+ PREVLASTV = MAX( I, PREVLASTV )
+ IF( TAU( I ).EQ.ZERO ) THEN
+*
+* H(i) = I
+*
+ DO J = 1, I
+ T( J, I ) = ZERO
+ END DO
+ ELSE
+*
+* general case
+*
+ IF( LSAME( STOREV, 'C' ) ) THEN
+* Skip any trailing zeros.
+ DO LASTV = N, I+1, -1
+ IF( V( LASTV, I ).NE.ZERO ) EXIT
+ END DO
+ DO J = 1, I-1
+ T( J, I ) = -TAU( I ) * V( I , J )
+ END DO
+ J = MIN( LASTV, PREVLASTV )
+*
+* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
+*
+ CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ),
+ $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE,
+ $ T( 1, I ), 1 )
+ ELSE
+* Skip any trailing zeros.
+ DO LASTV = N, I+1, -1
+ IF( V( I, LASTV ).NE.ZERO ) EXIT
+ END DO
+ DO J = 1, I-1
+ T( J, I ) = -TAU( I ) * V( J , I )
+ END DO
+ J = MIN( LASTV, PREVLASTV )
+*
+* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
+*
+ CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ),
+ $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
+ $ ONE, T( 1, I ), 1 )
+ END IF
+*
+* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+ CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
+ $ T,
+ $ LDT, T( 1, I ), 1 )
+ T( I, I ) = TAU( I )
+ IF( I.GT.1 ) THEN
+ PREVLASTV = MAX( PREVLASTV, LASTV )
+ ELSE
+ PREVLASTV = LASTV
+ END IF
+ END IF
+ END DO
+ ELSE
+ PREVLASTV = 1
+ DO I = K, 1, -1
+ IF( TAU( I ).EQ.ZERO ) THEN
+*
+* H(i) = I
+*
+ DO J = I, K
+ T( J, I ) = ZERO
+ END DO
+ ELSE
+*
+* general case
+*
+ IF( I.LT.K ) THEN
+ IF( LSAME( STOREV, 'C' ) ) THEN
+* Skip any leading zeros.
+ DO LASTV = 1, I-1
+ IF( V( LASTV, I ).NE.ZERO ) EXIT
+ END DO
+ DO J = I+1, K
+ T( J, I ) = -TAU( I ) * V( N-K+I , J )
+ END DO
+ J = MAX( LASTV, PREVLASTV )
+*
+* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
+*
+ CALL SGEMV( 'Transpose', N-K+I-J, K-I,
+ $ -TAU( I ),
+ $ V( J, I+1 ), LDV, V( J, I ), 1, ONE,
+ $ T( I+1, I ), 1 )
+ ELSE
+* Skip any leading zeros.
+ DO LASTV = 1, I-1
+ IF( V( I, LASTV ).NE.ZERO ) EXIT
+ END DO
+ DO J = I+1, K
+ T( J, I ) = -TAU( I ) * V( J, N-K+I )
+ END DO
+ J = MAX( LASTV, PREVLASTV )
+*
+* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
+*
+ CALL SGEMV( 'No transpose', K-I, N-K+I-J,
+ $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV,
+ $ ONE, T( I+1, I ), 1 )
+ END IF
+*
+* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+ CALL STRMV( 'Lower', 'No transpose', 'Non-unit',
+ $ K-I,
+ $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+ IF( I.GT.1 ) THEN
+ PREVLASTV = MIN( PREVLASTV, LASTV )
+ ELSE
+ PREVLASTV = LASTV
+ END IF
+ END IF
+ T( I, I ) = TAU( I )
+ END IF
+ END DO
+ END IF
+ RETURN
+*
+* End of SLARFT_LVL2
+*
+ END
diff --git a/lapack-netlib/SRC/zlarft.c b/lapack-netlib/SRC/zlarft.c
index f8ca30b318..404f96358c 100644
--- a/lapack-netlib/SRC/zlarft.c
+++ b/lapack-netlib/SRC/zlarft.c
@@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
-static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@@ -420,16 +419,16 @@ static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integ
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i \brief \b ZLARFT forms the triangular factor T of a block reflector H = I - vtvH */
@@ -539,7 +539,6 @@ static integer c__1 = 1;
/* Online html documentation available at */
/* http://www.netlib.org/lapack/explore-html/ */
-/* > \htmlonly */
/* > Download ZLARFT + dependencies */
/* > */
@@ -550,12 +549,11 @@ f"> */
/* > */
/* > [TXT] */
-/* > \endhtmlonly */
/* Definition: */
/* =========== */
-/* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
+/* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
/* CHARACTER DIRECT, STOREV */
/* INTEGER K, LDT, LDV, N */
@@ -663,9 +661,7 @@ f"> */
/* > \author Univ. of Colorado Denver */
/* > \author NAG Ltd. */
-/* > \date June 2016 */
-
-/* > \ingroup complex16OTHERauxiliary */
+/* > \ingroup larft */
/* > \par Further Details: */
/* ===================== */
@@ -694,257 +690,542 @@ f"> */
/* > \endverbatim */
/* > */
/* ===================================================================== */
-/* Subroutine */ void zlarft_(char *direct, char *storev, integer *n, integer *
+/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer *
k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex *
t, integer *ldt)
{
/* System generated locals */
- integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
- doublecomplex z__1, z__2, z__3;
+ address a__1[2];
+ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2[2], i__3, i__4;
+ doublecomplex z__1;
+ char ch__1[2];
/* Local variables */
- integer i__, j;
+ integer i__, j, l;
+ logical lq, ql, qr;
+ integer nx;
+ extern /* Subroutine */ int zlarft_lvl2__(char *, char *, integer *,
+ integer *, doublecomplex *, integer *, doublecomplex *,
+ doublecomplex *, integer *);
+ logical dirf, colv;
extern logical lsame_(char *, char *);
- extern /* Subroutine */ void zgemm_(char *, char *, integer *, integer *,
+ extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
integer *, doublecomplex *, doublecomplex *, integer *,
doublecomplex *, integer *, doublecomplex *, doublecomplex *,
- integer *), zgemv_(char *, integer *, integer *,
- doublecomplex *, doublecomplex *, integer *, doublecomplex *,
- integer *, doublecomplex *, doublecomplex *, integer *);
- integer lastv;
- extern /* Subroutine */ void ztrmv_(char *, char *, char *, integer *,
+ integer *), ztrmm_(char *, char *, char *, char *,
+ integer *, integer *, doublecomplex *, doublecomplex *, integer *
+ , doublecomplex *, integer *);
+ extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+ integer *, integer *, ftnlen, ftnlen);
+ extern /* Subroutine */ int zlacpy_(char *, integer *, integer *,
doublecomplex *, integer *, doublecomplex *, integer *);
- integer prevlastv;
- extern /* Subroutine */ void mecago_();
-/* -- LAPACK auxiliary routine (version 3.7.0) -- */
+/* -- LAPACK auxiliary routine -- */
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-/* June 2016 */
-/* ===================================================================== */
+
+
+
+
+
+
+
+
+
+
+/* The general scheme used is inspired by the approach inside DGEQRT3 */
+/* which was (at the time of writing this code): */
+/* Based on the algorithm of Elmroth and Gustavson, */
+/* IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+
/* Quick return if possible */
/* Parameter adjustments */
v_dim1 = *ldv;
- v_offset = 1 + v_dim1 * 1;
+ v_offset = 1 + v_dim1;
v -= v_offset;
--tau;
t_dim1 = *ldt;
- t_offset = 1 + t_dim1 * 1;
+ t_offset = 1 + t_dim1;
t -= t_offset;
/* Function Body */
- if (*n == 0) {
- return;
+ if (*n == 0 || *k == 0) {
+ return 0;
+ }
+
+/* Base case */
+
+ if (*n == 1 || *k == 1) {
+ i__1 = t_dim1 + 1;
+ t[i__1].r = tau[1].r, t[i__1].i = tau[1].i;
+ return 0;
}
- if (lsame_(direct, "F")) {
- prevlastv = *n;
- i__1 = *k;
- for (i__ = 1; i__ <= i__1; ++i__) {
- prevlastv = f2cmax(prevlastv,i__);
- i__2 = i__;
- if (tau[i__2].r == 0. && tau[i__2].i == 0.) {
+/* Determine when to cross over into the level 2 based implementation */
+
+/* Writing concatenation */
+ i__2[0] = 1, a__1[0] = direct;
+ i__2[1] = 1, a__1[1] = storev;
+ s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+ nx = ilaenv_(&c__3, "ZLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, (
+ ftnlen)2);
+ if (*k < nx) {
+ zlarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+ return 0;
+ }
-/* H(i) = I */
+/* Beginning of executable statements */
- i__2 = i__;
- for (j = 1; j <= i__2; ++j) {
- i__3 = j + i__ * t_dim1;
- t[i__3].r = 0., t[i__3].i = 0.;
- }
- } else {
-
-/* general case */
-
- if (lsame_(storev, "C")) {
-/* Skip any trailing zeros. */
- i__2 = i__ + 1;
- for (lastv = *n; lastv >= i__2; --lastv) {
- i__3 = lastv + i__ * v_dim1;
- if (v[i__3].r != 0. || v[i__3].i != 0.) {
- myexit_();
- }
- }
- i__2 = i__ - 1;
- for (j = 1; j <= i__2; ++j) {
- i__3 = j + i__ * t_dim1;
- i__4 = i__;
- z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i;
- d_cnjg(&z__3, &v[i__ + j * v_dim1]);
- z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i =
- z__2.r * z__3.i + z__2.i * z__3.r;
- t[i__3].r = z__1.r, t[i__3].i = z__1.i;
- }
- j = f2cmin(lastv,prevlastv);
-
-/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */
-
- i__2 = j - i__;
- i__3 = i__ - 1;
- i__4 = i__;
- z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
- zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__
- + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &
- c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1);
- } else {
-/* Skip any trailing zeros. */
- i__2 = i__ + 1;
- for (lastv = *n; lastv >= i__2; --lastv) {
- i__3 = i__ + lastv * v_dim1;
- if (v[i__3].r != 0. || v[i__3].i != 0.) {
- myexit_();
- }
- }
- i__2 = i__ - 1;
- for (j = 1; j <= i__2; ++j) {
- i__3 = j + i__ * t_dim1;
- i__4 = i__;
- z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i;
- i__5 = j + i__ * v_dim1;
- z__1.r = z__2.r * v[i__5].r - z__2.i * v[i__5].i,
- z__1.i = z__2.r * v[i__5].i + z__2.i * v[i__5]
- .r;
- t[i__3].r = z__1.r, t[i__3].i = z__1.i;
- }
- j = f2cmin(lastv,prevlastv);
-
-/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */
-
- i__2 = i__ - 1;
- i__3 = j - i__;
- i__4 = i__;
- z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
- zgemm_("N", "C", &i__2, &c__1, &i__3, &z__1, &v[(i__ + 1)
- * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1],
- ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt);
- }
+ l = *k / 2;
-/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
-
- i__2 = i__ - 1;
- ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
- t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
- i__2 = i__ + i__ * t_dim1;
- i__3 = i__;
- t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
- if (i__ > 1) {
- prevlastv = f2cmax(prevlastv,lastv);
- } else {
- prevlastv = lastv;
- }
+/* Determine what kind of Q we need to compute */
+/* We assume that if the user doesn't provide 'F' for DIRECT, */
+/* then they meant to provide 'B' and if they don't provide */
+/* 'C' for STOREV, then they meant to provide 'R' */
+
+ dirf = lsame_(direct, "F");
+ colv = lsame_(storev, "C");
+
+/* QR happens when we have forward direction in column storage */
+
+ qr = dirf && colv;
+
+/* LQ happens when we have forward direction in row storage */
+
+ lq = dirf && ! colv;
+
+/* QL happens when we have backward direction in column storage */
+
+ ql = ! dirf && colv;
+
+/* The last case is RQ. Due to how we structured this, if the */
+/* above 3 are false, then RQ must be true, so we never store */
+/* this */
+/* RQ happens when we have backward direction in row storage */
+/* RQ = (.NOT.DIRF).AND.(.NOT.COLV) */
+
+ if (qr) {
+
+/* Break V apart into 6 components */
+
+/* V = |---------------| */
+/* |V_{1,1} 0 | */
+/* |V_{2,1} V_{2,2}| */
+/* |V_{3,1} V_{3,2}| */
+/* |---------------| */
+
+/* V_{1,1}\in\C^{l,l} unit lower triangular */
+/* V_{2,1}\in\C^{k-l,l} rectangular */
+/* V_{3,1}\in\C^{n-k,l} rectangular */
+
+/* V_{2,2}\in\C^{k-l,k-l} unit lower triangular */
+/* V_{3,2}\in\C^{n-k,k-l} rectangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} T_{1,2}| */
+/* |0 T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\C^{l, l} upper triangular */
+/* T_{2,2}\in\C^{k-l, k-l} upper triangular */
+/* T_{1,2}\in\C^{l, k-l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */
+/* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */
+
+/* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */
+
+/* Then, we can define the matrix V as */
+/* V = |-------| */
+/* |V_1 V_2| */
+/* |-------| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V*T*V' */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{1,2} */
+
+/* Compute T_{1,1} recursively */
+
+ zlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ i__1 = *n - l;
+ i__3 = *k - l;
+ zlarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1],
+ ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/* Compute T_{1,2} */
+/* T_{1,2} = V_{2,1}' */
+
+ i__1 = l;
+ for (j = 1; j <= i__1; ++j) {
+ i__3 = *k - l;
+ for (i__ = 1; i__ <= i__3; ++i__) {
+ i__4 = j + (l + i__) * t_dim1;
+ d_cnjg(&z__1, &v[l + i__ + j * v_dim1]);
+ t[i__4].r = z__1.r, t[i__4].i = z__1.i;
}
}
- } else {
- prevlastv = 1;
- for (i__ = *k; i__ >= 1; --i__) {
- i__1 = i__;
- if (tau[i__1].r == 0. && tau[i__1].i == 0.) {
-/* H(i) = I */
+/* T_{1,2} = T_{1,2}*V_{2,2} */
- i__1 = *k;
- for (j = i__; j <= i__1; ++j) {
- i__2 = j + i__ * t_dim1;
- t[i__2].r = 0., t[i__2].i = 0.;
- }
- } else {
-
-/* general case */
-
- if (i__ < *k) {
- if (lsame_(storev, "C")) {
-/* Skip any leading zeros. */
- i__1 = i__ - 1;
- for (lastv = 1; lastv <= i__1; ++lastv) {
- i__2 = lastv + i__ * v_dim1;
- if (v[i__2].r != 0. || v[i__2].i != 0.) {
- myexit_();
- }
- }
- i__1 = *k;
- for (j = i__ + 1; j <= i__1; ++j) {
- i__2 = j + i__ * t_dim1;
- i__3 = i__;
- z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i;
- d_cnjg(&z__3, &v[*n - *k + i__ + j * v_dim1]);
- z__1.r = z__2.r * z__3.r - z__2.i * z__3.i,
- z__1.i = z__2.r * z__3.i + z__2.i *
- z__3.r;
- t[i__2].r = z__1.r, t[i__2].i = z__1.i;
- }
- j = f2cmax(lastv,prevlastv);
-
-/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */
-
- i__1 = *n - *k + i__ - j;
- i__2 = *k - i__;
- i__3 = i__;
- z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
- zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[
- j + (i__ + 1) * v_dim1], ldv, &v[j + i__ *
- v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ *
- t_dim1], &c__1);
- } else {
-/* Skip any leading zeros. */
- i__1 = i__ - 1;
- for (lastv = 1; lastv <= i__1; ++lastv) {
- i__2 = i__ + lastv * v_dim1;
- if (v[i__2].r != 0. || v[i__2].i != 0.) {
- myexit_();
- }
- }
- i__1 = *k;
- for (j = i__ + 1; j <= i__1; ++j) {
- i__2 = j + i__ * t_dim1;
- i__3 = i__;
- z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i;
- i__4 = j + (*n - *k + i__) * v_dim1;
- z__1.r = z__2.r * v[i__4].r - z__2.i * v[i__4].i,
- z__1.i = z__2.r * v[i__4].i + z__2.i * v[
- i__4].r;
- t[i__2].r = z__1.r, t[i__2].i = z__1.i;
- }
- j = f2cmax(lastv,prevlastv);
-
-/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */
-
- i__1 = *k - i__;
- i__2 = *n - *k + i__ - j;
- i__3 = i__;
- z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
- zgemm_("N", "C", &i__1, &c__1, &i__2, &z__1, &v[i__ +
- 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
- ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt);
- }
-
-/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
- i__1 = *k - i__;
- ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
- + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
- t_dim1], &c__1)
- ;
- if (i__ > 1) {
- prevlastv = f2cmin(prevlastv,lastv);
- } else {
- prevlastv = lastv;
- }
- }
- i__1 = i__ + i__ * t_dim1;
- i__2 = i__;
- t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+ i__1 = *k - l;
+ ztrmm_("Right", "Lower", "No transpose", "Unit", &l, &i__1, &c_b1, &v[
+ l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__1 = *k - l;
+ i__3 = *n - *k;
+ zgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[*k +
+ 1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b1, &
+ t[(l + 1) * t_dim1 + 1], ldt);
+
+/* At this point, we have that T_{1,2} = V_1'*V_2 */
+/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/* respectively. */
+
+/* T_{1,2} = -T_{1,1}*T_{1,2} */
+
+ i__1 = *k - l;
+ ztrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3,
+ &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*T_{2,2} */
+
+ i__1 = *k - l;
+ ztrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+ &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1],
+ ldt);
+ } else if (lq) {
+
+/* Break V apart into 6 components */
+
+/* V = |----------------------| */
+/* |V_{1,1} V_{1,2} V{1,3}| */
+/* |0 V_{2,2} V{2,3}| */
+/* |----------------------| */
+
+/* V_{1,1}\in\C^{l,l} unit upper triangular */
+/* V_{1,2}\in\C^{l,k-l} rectangular */
+/* V_{1,3}\in\C^{l,n-k} rectangular */
+
+/* V_{2,2}\in\C^{k-l,k-l} unit upper triangular */
+/* V_{2,3}\in\C^{k-l,n-k} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} T_{1,2}| */
+/* |0 T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\C^{l, l} upper triangular */
+/* T_{2,2}\in\C^{k-l, k-l} upper triangular */
+/* T_{1,2}\in\C^{l, k-l} rectangular */
+
+/* Then, consider the product: */
+
+/* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */
+/* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */
+
+/* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */
+
+/* Then, we can define the matrix V as */
+/* V = |---| */
+/* |V_1| */
+/* |V_2| */
+/* |---| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V'*T*V */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{1,2} */
+
+/* Compute T_{1,1} recursively */
+
+ zlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ i__1 = *n - l;
+ i__3 = *k - l;
+ zlarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1],
+ ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/* Compute T_{1,2} */
+/* T_{1,2} = V_{1,2} */
+
+ i__1 = *k - l;
+ zlacpy_("All", &l, &i__1, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) *
+ t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*V_{2,2}' */
+
+ i__1 = *k - l;
+ ztrmm_("Right", "Upper", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[l
+ + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__1 = *k - l;
+ i__3 = *n - *k;
+ zgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[(*k +
+ 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, &
+ c_b1, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* At this point, we have that T_{1,2} = V_1*V_2' */
+/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/* respectively. */
+
+/* T_{1,2} = -T_{1,1}*T_{1,2} */
+
+ i__1 = *k - l;
+ ztrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3,
+ &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/* T_{1,2} = T_{1,2}*T_{2,2} */
+
+ i__1 = *k - l;
+ ztrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+ &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1],
+ ldt);
+ } else if (ql) {
+
+/* Break V apart into 6 components */
+
+/* V = |---------------| */
+/* |V_{1,1} V_{1,2}| */
+/* |V_{2,1} V_{2,2}| */
+/* |0 V_{3,2}| */
+/* |---------------| */
+
+/* V_{1,1}\in\C^{n-k,k-l} rectangular */
+/* V_{2,1}\in\C^{k-l,k-l} unit upper triangular */
+
+/* V_{1,2}\in\C^{n-k,l} rectangular */
+/* V_{2,2}\in\C^{k-l,l} rectangular */
+/* V_{3,2}\in\C^{l,l} unit upper triangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} 0 | */
+/* |T_{2,1} T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */
+/* T_{2,2}\in\C^{l, l} non-unit lower triangular */
+/* T_{2,1}\in\C^{k-l, l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */
+/* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */
+
+/* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */
+
+/* Then, we can define the matrix V as */
+/* V = |-------| */
+/* |V_1 V_2| */
+/* |-------| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V*T*V' */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{2,1} */
+
+/* Compute T_{1,1} recursively */
+
+ i__1 = *n - l;
+ i__3 = *k - l;
+ zlarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ zlarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, &
+ tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/* Compute T_{2,1} */
+/* T_{2,1} = V_{2,2}' */
+
+ i__1 = *k - l;
+ for (j = 1; j <= i__1; ++j) {
+ i__3 = l;
+ for (i__ = 1; i__ <= i__3; ++i__) {
+ i__4 = *k - l + i__ + j * t_dim1;
+ d_cnjg(&z__1, &v[*n - *k + j + (*k - l + i__) * v_dim1]);
+ t[i__4].r = z__1.r, t[i__4].i = z__1.i;
}
}
- }
- return;
-/* End of ZLARFT */
+/* T_{2,1} = T_{2,1}*V_{2,1} */
+
+ i__1 = *k - l;
+ ztrmm_("Right", "Upper", "No transpose", "Unit", &l, &i__1, &c_b1, &v[
+ *n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+/* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__1 = *k - l;
+ i__3 = *n - *k;
+ zgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[(*k -
+ l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b1, &t[*k -
+ l + 1 + t_dim1], ldt);
+
+/* At this point, we have that T_{2,1} = V_2'*V_1 */
+/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/* respectively. */
+
+/* T_{2,1} = -T_{2,2}*T_{2,1} */
+
+ i__1 = *k - l;
+ ztrmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b3,
+ &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 +
+ t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*T_{1,1} */
+
+ i__1 = *k - l;
+ ztrmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+ &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+ } else {
+
+/* Else means RQ case */
+
+/* Break V apart into 6 components */
+
+/* V = |-----------------------| */
+/* |V_{1,1} V_{1,2} 0 | */
+/* |V_{2,1} V_{2,2} V_{2,3}| */
+/* |-----------------------| */
+
+/* V_{1,1}\in\C^{k-l,n-k} rectangular */
+/* V_{1,2}\in\C^{k-l,k-l} unit lower triangular */
+
+/* V_{2,1}\in\C^{l,n-k} rectangular */
+/* V_{2,2}\in\C^{l,k-l} rectangular */
+/* V_{2,3}\in\C^{l,l} unit lower triangular */
+
+/* We will construct the T matrix */
+/* T = |---------------| */
+/* |T_{1,1} 0 | */
+/* |T_{2,1} T_{2,2}| */
+/* |---------------| */
+
+/* T is the triangular factor obtained from block reflectors. */
+/* To motivate the structure, assume we have already computed T_{1,1} */
+/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */
+/* T_{2,2}\in\C^{l, l} non-unit lower triangular */
+/* T_{2,1}\in\C^{k-l, l} rectangular */
+
+/* Where l = floor(k/2) */
+
+/* Then, consider the product: */
+
+/* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */
+/* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */
+
+/* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */
+
+/* Then, we can define the matrix V as */
+/* V = |---| */
+/* |V_1| */
+/* |V_2| */
+/* |---| */
+
+/* So, our product is equivalent to the matrix product */
+/* I - V'*T*V */
+/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/* to compute T_{2,1} */
+
+/* Compute T_{1,1} recursively */
+
+ i__1 = *n - l;
+ i__3 = *k - l;
+ zlarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[
+ t_offset], ldt);
+
+/* Compute T_{2,2} recursively */
+
+ zlarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k
+ - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/* Compute T_{2,1} */
+/* T_{2,1} = V_{2,2} */
+
+ i__1 = *k - l;
+ zlacpy_("All", &l, &i__1, &v[*k - l + 1 + (*n - *k + 1) * v_dim1],
+ ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*V_{1,2}' */
+
+ i__1 = *k - l;
+ ztrmm_("Right", "Lower", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[(*
+ n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */
+/* Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+ i__1 = *k - l;
+ i__3 = *n - *k;
+ zgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[*k -
+ l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - l + 1
+ + t_dim1], ldt);
+
+/* At this point, we have that T_{2,1} = V_2*V_1' */
+/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/* respectively. */
+
+/* T_{2,1} = -T_{2,2}*T_{2,1} */
+
+ i__1 = *k - l;
+ ztrmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b3, &
+ t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 +
+ t_dim1], ldt);
+
+/* T_{2,1} = T_{2,1}*T_{1,1} */
+
+ i__1 = *k - l;
+ ztrmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b1,
+ &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+ }
+ return 0;
} /* zlarft_ */
diff --git a/lapack-netlib/SRC/zlarft.f b/lapack-netlib/SRC/zlarft.f
index 900795afad..efd52037d3 100644
--- a/lapack-netlib/SRC/zlarft.f
+++ b/lapack-netlib/SRC/zlarft.f
@@ -5,7 +5,6 @@
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
-*> \htmlonly
*> Download ZLARFT + dependencies
*>
*> [TGZ]
@@ -13,7 +12,6 @@
*> [ZIP]
*>
*> [TXT]
-*> \endhtmlonly
*
* Definition:
* ===========
@@ -161,6 +159,7 @@
* =====================================================================
RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV,
$ TAU, T, LDT )
+ IMPLICIT NONE
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -179,11 +178,12 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV,
* .. Parameters ..
*
COMPLEX*16 ONE, NEG_ONE, ZERO
- PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0)
+ PARAMETER(ONE=(1.0D+0,0.0D+0), ZERO = (0.0D+0,0.0D+0),
+ $ NEG_ONE=(-1.0D+0,0.0D+0))
*
* .. Local Scalars ..
*
- INTEGER I,J,L
+ INTEGER I,J,L,NX
LOGICAL QR,LQ,QL,DIRF,COLV
*
* .. External Subroutines ..
@@ -193,7 +193,8 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV,
* .. External Functions..
*
LOGICAL LSAME
- EXTERNAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
*
* .. Intrinsic Functions..
*
@@ -219,6 +220,14 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV,
RETURN
END IF
*
+* Determine when to cross over into the level 2 based implementation
+*
+ NX = ILAENV(3, "ZLARFT", DIRECT // STOREV, N, K, -1, -1)
+ IF(K.LT.NX) THEN
+ CALL ZLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
+ RETURN
+ END IF
+*
* Beginning of executable statements
*
L = K / 2
diff --git a/lapack-netlib/SRC/zlarft_lvl2.c b/lapack-netlib/SRC/zlarft_lvl2.c
new file mode 100644
index 0000000000..193870627b
--- /dev/null
+++ b/lapack-netlib/SRC/zlarft_lvl2.c
@@ -0,0 +1,941 @@
+#include
+#include
+#include
+#include
+#include
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef blasint logical;
+
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{ flag cierr;
+ ftnint ciunit;
+ flag ciend;
+ char *cifmt;
+ ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{ flag icierr;
+ char *iciunit;
+ flag iciend;
+ char *icifmt;
+ ftnint icirlen;
+ ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{ flag oerr;
+ ftnint ounit;
+ char *ofnm;
+ ftnlen ofnmlen;
+ char *osta;
+ char *oacc;
+ char *ofm;
+ ftnint orl;
+ char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{ flag cerr;
+ ftnint cunit;
+ char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{ flag aerr;
+ ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{ flag inerr;
+ ftnint inunit;
+ char *infile;
+ ftnlen infilen;
+ ftnint *inex; /*parameters in standard's order*/
+ ftnint *inopen;
+ ftnint *innum;
+ ftnint *innamed;
+ char *inname;
+ ftnlen innamlen;
+ char *inacc;
+ ftnlen inacclen;
+ char *inseq;
+ ftnlen inseqlen;
+ char *indir;
+ ftnlen indirlen;
+ char *infmt;
+ ftnlen infmtlen;
+ char *inform;
+ ftnint informlen;
+ char *inunf;
+ ftnlen inunflen;
+ ftnint *inrecl;
+ ftnint *innrec;
+ char *inblank;
+ ftnlen inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype { /* for multiple entry points */
+ integer1 g;
+ shortint h;
+ integer i;
+ /* longint j; */
+ real r;
+ doublereal d;
+ complex c;
+ doublecomplex z;
+ };
+
+typedef union Multitype Multitype;
+
+struct Vardesc { /* for Namelist */
+ char *name;
+ char *addr;
+ ftnlen *dims;
+ int type;
+ };
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+ char *name;
+ Vardesc **vars;
+ int nvars;
+ };
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b) ((a) >> (b) & 1)
+#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+ float pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+static double dpow_ui(double x, integer n) {
+ double pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+ complex pow={1.0,0.0}; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+ for(u = n; ; ) {
+ if(u & 01) pow.r *= x.r, pow.i *= x.i;
+ if(u >>= 1) x.r *= x.r, x.i *= x.i;
+ else break;
+ }
+ }
+ _Fcomplex p={pow.r, pow.i};
+ return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+ _Complex float pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+ _Dcomplex pow={1.0,0.0}; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+ for(u = n; ; ) {
+ if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+ if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+ else break;
+ }
+ }
+ _Dcomplex p = {pow._Val[0], pow._Val[1]};
+ return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+ _Complex double pow=1.0; unsigned long int u;
+ if(n != 0) {
+ if(n < 0) n = -n, x = 1/x;
+ for(u = n; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+ integer pow; unsigned long int u;
+ if (n <= 0) {
+ if (n == 0 || x == 1) pow = 1;
+ else if (x != -1) pow = x == 0 ? 1/x : 0;
+ else n = -n;
+ }
+ if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+ u = n;
+ for(pow = 1; ; ) {
+ if(u & 01) pow *= x;
+ if(u >>= 1) x *= x;
+ else break;
+ }
+ }
+ return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+ double m; integer i, mi;
+ for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+ if (w[i-1]>m) mi=i ,m=w[i-1];
+ return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+ float m; integer i, mi;
+ for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+ if (w[i-1]>m) mi=i ,m=w[i-1];
+ return mi-s+1;
+}
+
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+ integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+ _Fcomplex zdotc = {0.0, 0.0};
+ if (incx == 1 && incy == 1) {
+ for (i=0;i \brief \b ZLARFT_LVL2: Level 2 BLAS version for terminating case of ZLARFT. */
+
+/* =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/* http://www.netlib.org/lapack/explore-html/ */
+
+/* > Download ZLARFT_LVL2 + dependencies */
+/* > */
+/* > [TGZ] */
+/* > */
+/* > [ZIP] */
+/* > */
+/* > [TXT] */
+
+/* Definition: */
+/* =========== */
+
+/* SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */
+/* T, LDT ) */
+
+/* CHARACTER DIRECT, STOREV */
+/* INTEGER K, LDT, LDV, N */
+/* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/* ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > ZLARFT_LVL2 forms the triangular factor T of a complex block reflector H */
+/* > of order n, which is defined as a product of k elementary reflectors. */
+/* > */
+/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
+/* > */
+/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
+/* > */
+/* > If STOREV = 'C', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th column of the array V, and */
+/* > */
+/* > H = I - V * T * V**H */
+/* > */
+/* > If STOREV = 'R', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th row of the array V, and */
+/* > */
+/* > H = I - V**H * T * V */
+/* > \endverbatim */
+
+/* Arguments: */
+/* ========== */
+
+/* > \param[in] DIRECT */
+/* > \verbatim */
+/* > DIRECT is CHARACTER*1 */
+/* > Specifies the order in which the elementary reflectors are */
+/* > multiplied to form the block reflector: */
+/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */
+/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] STOREV */
+/* > \verbatim */
+/* > STOREV is CHARACTER*1 */
+/* > Specifies how the vectors which define the elementary */
+/* > reflectors are stored (see also Further Details): */
+/* > = 'C': columnwise */
+/* > = 'R': rowwise */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* > N is INTEGER */
+/* > The order of the block reflector H. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* > K is INTEGER */
+/* > The order of the triangular factor T (= the number of */
+/* > elementary reflectors). K >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* > V is COMPLEX*16 array, dimension */
+/* > (LDV,K) if STOREV = 'C' */
+/* > (LDV,N) if STOREV = 'R' */
+/* > The matrix V. See further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* > LDV is INTEGER */
+/* > The leading dimension of the array V. */
+/* > If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TAU */
+/* > \verbatim */
+/* > TAU is COMPLEX*16 array, dimension (K) */
+/* > TAU(i) must contain the scalar factor of the elementary */
+/* > reflector H(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* > T is COMPLEX*16 array, dimension (LDT,K) */
+/* > The k by k triangular factor T of the block reflector. */
+/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
+/* > lower triangular. The rest of the array is not used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* > LDT is INTEGER */
+/* > The leading dimension of the array T. LDT >= K. */
+/* > \endverbatim */
+
+/* Authors: */
+/* ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup larft */
+
+/* > \par Further Details: */
+/* ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The shape of the matrix V and the storage of the vectors which define */
+/* > the H(i) is best illustrated by the following example with n = 5 and */
+/* > k = 3. The elements equal to 1 are not stored. */
+/* > */
+/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */
+/* > */
+/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */
+/* > ( v1 1 ) ( 1 v2 v2 v2 ) */
+/* > ( v1 v2 1 ) ( 1 v3 v3 ) */
+/* > ( v1 v2 v3 ) */
+/* > ( v1 v2 v3 ) */
+/* > */
+/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */
+/* > */
+/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */
+/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */
+/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */
+/* > ( 1 v3 ) */
+/* > ( 1 ) */
+/* > \endverbatim */
+/* > */
+/* ===================================================================== */
+/* Subroutine */ int zlarft_lvl2__(char *direct, char *storev, integer *n,
+ integer *k, doublecomplex *v, integer *ldv, doublecomplex *tau,
+ doublecomplex *t, integer *ldt)
+{
+ /* System generated locals */
+ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
+ doublecomplex z__1, z__2, z__3;
+
+ /* Local variables */
+ integer i__, j, prevlastv;
+ extern logical lsame_(char *, char *);
+ extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+ integer *, doublecomplex *, doublecomplex *, integer *,
+ doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+ integer *), zgemv_(char *, integer *, integer *,
+ doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+ integer *, doublecomplex *, doublecomplex *, integer *);
+ integer lastv;
+ extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *,
+ doublecomplex *, integer *, doublecomplex *, integer *), mecago_();
+
+
+/* -- LAPACK auxiliary routine -- */
+/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
+/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+
+/* ===================================================================== */
+
+
+/* Quick return if possible */
+
+ /* Parameter adjustments */
+ v_dim1 = *ldv;
+ v_offset = 1 + v_dim1;
+ v -= v_offset;
+ --tau;
+ t_dim1 = *ldt;
+ t_offset = 1 + t_dim1;
+ t -= t_offset;
+
+ /* Function Body */
+ if (*n == 0) {
+ return 0;
+ }
+
+ if (lsame_(direct, "F")) {
+ prevlastv = *n;
+ i__1 = *k;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ prevlastv = f2cmax(prevlastv,i__);
+ i__2 = i__;
+ if (tau[i__2].r == 0. && tau[i__2].i == 0.) {
+
+/* H(i) = I */
+
+ i__2 = i__;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = j + i__ * t_dim1;
+ t[i__3].r = 0., t[i__3].i = 0.;
+ }
+ } else {
+
+/* general case */
+
+ if (lsame_(storev, "C")) {
+/* Skip any trailing zeros. */
+ i__2 = i__ + 1;
+ for (lastv = *n; lastv >= i__2; --lastv) {
+ i__3 = lastv + i__ * v_dim1;
+ if (v[i__3].r != 0. || v[i__3].i != 0.) {
+ myexit_();
+ }
+ }
+ i__2 = i__ - 1;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = j + i__ * t_dim1;
+ i__4 = i__;
+ z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i;
+ d_cnjg(&z__3, &v[i__ + j * v_dim1]);
+ z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i =
+ z__2.r * z__3.i + z__2.i * z__3.r;
+ t[i__3].r = z__1.r, t[i__3].i = z__1.i;
+ }
+ j = f2cmin(lastv,prevlastv);
+
+/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */
+
+ i__2 = j - i__;
+ i__3 = i__ - 1;
+ i__4 = i__;
+ z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
+ zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__
+ + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &
+ c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1);
+ } else {
+/* Skip any trailing zeros. */
+ i__2 = i__ + 1;
+ for (lastv = *n; lastv >= i__2; --lastv) {
+ i__3 = i__ + lastv * v_dim1;
+ if (v[i__3].r != 0. || v[i__3].i != 0.) {
+ myexit_();
+ }
+ }
+ i__2 = i__ - 1;
+ for (j = 1; j <= i__2; ++j) {
+ i__3 = j + i__ * t_dim1;
+ i__4 = i__;
+ z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i;
+ i__5 = j + i__ * v_dim1;
+ z__1.r = z__2.r * v[i__5].r - z__2.i * v[i__5].i,
+ z__1.i = z__2.r * v[i__5].i + z__2.i * v[i__5]
+ .r;
+ t[i__3].r = z__1.r, t[i__3].i = z__1.i;
+ }
+ j = f2cmin(lastv,prevlastv);
+
+/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */
+
+ i__2 = i__ - 1;
+ i__3 = j - i__;
+ i__4 = i__;
+ z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
+ zgemm_("N", "C", &i__2, &c__1, &i__3, &z__1, &v[(i__ + 1)
+ * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1],
+ ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt);
+ }
+
+/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+ i__2 = i__ - 1;
+ ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+ t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+ i__2 = i__ + i__ * t_dim1;
+ i__3 = i__;
+ t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+ if (i__ > 1) {
+ prevlastv = f2cmax(prevlastv,lastv);
+ } else {
+ prevlastv = lastv;
+ }
+ }
+ }
+ } else {
+ prevlastv = 1;
+ for (i__ = *k; i__ >= 1; --i__) {
+ i__1 = i__;
+ if (tau[i__1].r == 0. && tau[i__1].i == 0.) {
+
+/* H(i) = I */
+
+ i__1 = *k;
+ for (j = i__; j <= i__1; ++j) {
+ i__2 = j + i__ * t_dim1;
+ t[i__2].r = 0., t[i__2].i = 0.;
+ }
+ } else {
+
+/* general case */
+
+ if (i__ < *k) {
+ if (lsame_(storev, "C")) {
+/* Skip any leading zeros. */
+ i__1 = i__ - 1;
+ for (lastv = 1; lastv <= i__1; ++lastv) {
+ i__2 = lastv + i__ * v_dim1;
+ if (v[i__2].r != 0. || v[i__2].i != 0.) {
+ myexit_();
+ }
+ }
+ i__1 = *k;
+ for (j = i__ + 1; j <= i__1; ++j) {
+ i__2 = j + i__ * t_dim1;
+ i__3 = i__;
+ z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i;
+ d_cnjg(&z__3, &v[*n - *k + i__ + j * v_dim1]);
+ z__1.r = z__2.r * z__3.r - z__2.i * z__3.i,
+ z__1.i = z__2.r * z__3.i + z__2.i *
+ z__3.r;
+ t[i__2].r = z__1.r, t[i__2].i = z__1.i;
+ }
+ j = f2cmax(lastv,prevlastv);
+
+/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */
+
+ i__1 = *n - *k + i__ - j;
+ i__2 = *k - i__;
+ i__3 = i__;
+ z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
+ zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[
+ j + (i__ + 1) * v_dim1], ldv, &v[j + i__ *
+ v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ *
+ t_dim1], &c__1);
+ } else {
+/* Skip any leading zeros. */
+ i__1 = i__ - 1;
+ for (lastv = 1; lastv <= i__1; ++lastv) {
+ i__2 = i__ + lastv * v_dim1;
+ if (v[i__2].r != 0. || v[i__2].i != 0.) {
+ myexit_();
+ }
+ }
+ i__1 = *k;
+ for (j = i__ + 1; j <= i__1; ++j) {
+ i__2 = j + i__ * t_dim1;
+ i__3 = i__;
+ z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i;
+ i__4 = j + (*n - *k + i__) * v_dim1;
+ z__1.r = z__2.r * v[i__4].r - z__2.i * v[i__4].i,
+ z__1.i = z__2.r * v[i__4].i + z__2.i * v[
+ i__4].r;
+ t[i__2].r = z__1.r, t[i__2].i = z__1.i;
+ }
+ j = f2cmax(lastv,prevlastv);
+
+/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */
+
+ i__1 = *k - i__;
+ i__2 = *n - *k + i__ - j;
+ i__3 = i__;
+ z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
+ zgemm_("N", "C", &i__1, &c__1, &i__2, &z__1, &v[i__ +
+ 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
+ ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt);
+ }
+
+/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+ i__1 = *k - i__;
+ ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+ + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+ t_dim1], &c__1)
+ ;
+ if (i__ > 1) {
+ prevlastv = f2cmin(prevlastv,lastv);
+ } else {
+ prevlastv = lastv;
+ }
+ }
+ i__1 = i__ + i__ * t_dim1;
+ i__2 = i__;
+ t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+ }
+ }
+ }
+ return 0;
+
+/* End of ZLARFT_LVL2 */
+
+} /* zlarft_lvl2__ */
+
diff --git a/lapack-netlib/SRC/zlarft_lvl2.f b/lapack-netlib/SRC/zlarft_lvl2.f
new file mode 100644
index 0000000000..808c7fdb25
--- /dev/null
+++ b/lapack-netlib/SRC/zlarft_lvl2.f
@@ -0,0 +1,327 @@
+*> \brief \b ZLARFT_LVL2: Level 2 BLAS version for terminating case of ZLARFT.
+*
+* =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+* http://www.netlib.org/lapack/explore-html/
+*
+*> Download ZLARFT_LVL2 + dependencies
+*>
+*> [TGZ]
+*>
+*> [ZIP]
+*>
+*> [TXT]
+*
+* Definition:
+* ===========
+*
+* SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+* T, LDT )
+*
+* .. Scalar Arguments ..
+* CHARACTER DIRECT, STOREV
+* INTEGER K, LDT, LDV, N
+* ..
+* .. Array Arguments ..
+* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * )
+* ..
+*
+*
+*> \par Purpose:
+* =============
+*>
+*> \verbatim
+*>
+*> ZLARFT_LVL2 forms the triangular factor T of a complex block reflector H
+*> of order n, which is defined as a product of k elementary reflectors.
+*>
+*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*>
+*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*>
+*> If STOREV = 'C', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th column of the array V, and
+*>
+*> H = I - V * T * V**H
+*>
+*> If STOREV = 'R', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th row of the array V, and
+*>
+*> H = I - V**H * T * V
+*> \endverbatim
+*
+* Arguments:
+* ==========
+*
+*> \param[in] DIRECT
+*> \verbatim
+*> DIRECT is CHARACTER*1
+*> Specifies the order in which the elementary reflectors are
+*> multiplied to form the block reflector:
+*> = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*> = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*> \endverbatim
+*>
+*> \param[in] STOREV
+*> \verbatim
+*> STOREV is CHARACTER*1
+*> Specifies how the vectors which define the elementary
+*> reflectors are stored (see also Further Details):
+*> = 'C': columnwise
+*> = 'R': rowwise
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*> N is INTEGER
+*> The order of the block reflector H. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*> K is INTEGER
+*> The order of the triangular factor T (= the number of
+*> elementary reflectors). K >= 1.
+*> \endverbatim
+*>
+*> \param[in] V
+*> \verbatim
+*> V is COMPLEX*16 array, dimension
+*> (LDV,K) if STOREV = 'C'
+*> (LDV,N) if STOREV = 'R'
+*> The matrix V. See further details.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*> LDV is INTEGER
+*> The leading dimension of the array V.
+*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*> \endverbatim
+*>
+*> \param[in] TAU
+*> \verbatim
+*> TAU is COMPLEX*16 array, dimension (K)
+*> TAU(i) must contain the scalar factor of the elementary
+*> reflector H(i).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*> T is COMPLEX*16 array, dimension (LDT,K)
+*> The k by k triangular factor T of the block reflector.
+*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*> lower triangular. The rest of the array is not used.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*> LDT is INTEGER
+*> The leading dimension of the array T. LDT >= K.
+*> \endverbatim
+*
+* Authors:
+* ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup larft
+*
+*> \par Further Details:
+* =====================
+*>
+*> \verbatim
+*>
+*> The shape of the matrix V and the storage of the vectors which define
+*> the H(i) is best illustrated by the following example with n = 5 and
+*> k = 3. The elements equal to 1 are not stored.
+*>
+*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
+*>
+*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
+*> ( v1 1 ) ( 1 v2 v2 v2 )
+*> ( v1 v2 1 ) ( 1 v3 v3 )
+*> ( v1 v2 v3 )
+*> ( v1 v2 v3 )
+*>
+*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
+*>
+*> V = ( v1 v2 v3 ) V = ( v1 v1 1 )
+*> ( v1 v2 v3 ) ( v2 v2 v2 1 )
+*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
+*> ( 1 v3 )
+*> ( 1 )
+*> \endverbatim
+*>
+* =====================================================================
+ SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+ $ T, LDT )
+*
+* -- LAPACK auxiliary routine --
+* -- LAPACK is a software package provided by Univ. of Tennessee, --
+* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+* .. Scalar Arguments ..
+ CHARACTER DIRECT, STOREV
+ INTEGER K, LDT, LDV, N
+* ..
+* .. Array Arguments ..
+ COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * )
+* ..
+*
+* =====================================================================
+*
+* .. Parameters ..
+ COMPLEX*16 ONE, ZERO
+ PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ),
+ $ ZERO = ( 0.0D+0, 0.0D+0 ) )
+* ..
+* .. Local Scalars ..
+ INTEGER I, J, PREVLASTV, LASTV
+* ..
+* .. External Subroutines ..
+ EXTERNAL ZGEMV, ZTRMV, ZGEMM
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ EXTERNAL LSAME
+* ..
+* .. Executable Statements ..
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+ IF( LSAME( DIRECT, 'F' ) ) THEN
+ PREVLASTV = N
+ DO I = 1, K
+ PREVLASTV = MAX( PREVLASTV, I )
+ IF( TAU( I ).EQ.ZERO ) THEN
+*
+* H(i) = I
+*
+ DO J = 1, I
+ T( J, I ) = ZERO
+ END DO
+ ELSE
+*
+* general case
+*
+ IF( LSAME( STOREV, 'C' ) ) THEN
+* Skip any trailing zeros.
+ DO LASTV = N, I+1, -1
+ IF( V( LASTV, I ).NE.ZERO ) EXIT
+ END DO
+ DO J = 1, I-1
+ T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
+ END DO
+ J = MIN( LASTV, PREVLASTV )
+*
+* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
+*
+ CALL ZGEMV( 'Conjugate transpose', J-I, I-1,
+ $ -TAU( I ), V( I+1, 1 ), LDV,
+ $ V( I+1, I ), 1, ONE, T( 1, I ), 1 )
+ ELSE
+* Skip any trailing zeros.
+ DO LASTV = N, I+1, -1
+ IF( V( I, LASTV ).NE.ZERO ) EXIT
+ END DO
+ DO J = 1, I-1
+ T( J, I ) = -TAU( I ) * V( J , I )
+ END DO
+ J = MIN( LASTV, PREVLASTV )
+*
+* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
+*
+ CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
+ $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
+ $ ONE, T( 1, I ), LDT )
+ END IF
+*
+* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+ CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
+ $ T,
+ $ LDT, T( 1, I ), 1 )
+ T( I, I ) = TAU( I )
+ IF( I.GT.1 ) THEN
+ PREVLASTV = MAX( PREVLASTV, LASTV )
+ ELSE
+ PREVLASTV = LASTV
+ END IF
+ END IF
+ END DO
+ ELSE
+ PREVLASTV = 1
+ DO I = K, 1, -1
+ IF( TAU( I ).EQ.ZERO ) THEN
+*
+* H(i) = I
+*
+ DO J = I, K
+ T( J, I ) = ZERO
+ END DO
+ ELSE
+*
+* general case
+*
+ IF( I.LT.K ) THEN
+ IF( LSAME( STOREV, 'C' ) ) THEN
+* Skip any leading zeros.
+ DO LASTV = 1, I-1
+ IF( V( LASTV, I ).NE.ZERO ) EXIT
+ END DO
+ DO J = I+1, K
+ T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
+ END DO
+ J = MAX( LASTV, PREVLASTV )
+*
+* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
+*
+ CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I,
+ $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
+ $ 1, ONE, T( I+1, I ), 1 )
+ ELSE
+* Skip any leading zeros.
+ DO LASTV = 1, I-1
+ IF( V( I, LASTV ).NE.ZERO ) EXIT
+ END DO
+ DO J = I+1, K
+ T( J, I ) = -TAU( I ) * V( J, N-K+I )
+ END DO
+ J = MAX( LASTV, PREVLASTV )
+*
+* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
+*
+ CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J,
+ $ -TAU( I ),
+ $ V( I+1, J ), LDV, V( I, J ), LDV,
+ $ ONE, T( I+1, I ), LDT )
+ END IF
+*
+* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+ CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit',
+ $ K-I,
+ $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+ IF( I.GT.1 ) THEN
+ PREVLASTV = MIN( PREVLASTV, LASTV )
+ ELSE
+ PREVLASTV = LASTV
+ END IF
+ END IF
+ T( I, I ) = TAU( I )
+ END IF
+ END DO
+ END IF
+ RETURN
+*
+* End of ZLARFT_LVL2
+*
+ END