diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index 04dd49cfbe..bf346a1b5b 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -70,8 +70,8 @@ set(SLASRC slaqgb.f slaqge.f slaqp2.f slaqps.f slaqp2rk.f slaqp3rk.f slaqsb.f slaqsp.f slaqsy.f slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f - slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f - slarf1f.f slarf1l.f slarrv.f slartv.f + slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarft_lvl2.f slarfx.f + slarfy.f slargv.f slarf1f.f slarf1l.f slarrv.f slartv.f slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f @@ -177,7 +177,7 @@ set(CLASRC claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f claqz0.f claqz1.f claqz2.f claqz3.f claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f - clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f + clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f clarft_lvl2.f clarf1f.f clarf1l.f clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f90 clartv.f clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f90 @@ -262,8 +262,8 @@ set(DLASRC dlaqgb.f dlaqge.f dlaqp2.f dlaqp2rk.f dlaqp3rk.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f - dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f - dlarf1f.f dlarf1l.f dlargv.f dlarrv.f dlartv.f + dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarft_lvl2.f + dlarfx.f dlarfy.f dlarf1f.f dlarf1l.f dlargv.f dlarrv.f dlartv.f dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f @@ -372,7 +372,7 @@ set(ZLASRC zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f - zlarfg.f zlarfgp.f zlarft.f zlarf1f.f zlarf1l.f + zlarfg.f zlarfgp.f zlarft.f zlarft_lvl2.f zlarf1f.f zlarf1l.f zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f90 zlartv.f zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f zlassq.f90 zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f @@ -575,8 +575,8 @@ set(SLASRC slaqgb.c slaqge.c slaqp2.c slaqp2rk.c slaqp3rk.c slaqps.c slaqsb.c slaqsp.c slaqsy.c slaqr0.c slaqr1.c slaqr2.c slaqr3.c slaqr4.c slaqr5.c slaqtr.c slar1v.c slar2v.c ilaslr.c ilaslc.c - slarf.c slarfb.c slarfb_gett.c slarfg.c slarfgp.c slarft.c slarfx.c slarfy.c slargv.c - slarf1f.c slarf1l.c slarrv.c slartv.c + slarf.c slarfb.c slarfb_gett.c slarfg.c slarfgp.c slarft.c slarft_lvl2.c slarfx.c + slarfy.c slargv.c slarf1f.c slarf1l.c slarrv.c slartv.c slarz.c slarzb.c slarzt.c slasy2.c slasyf.c slasyf_rook.c slasyf_rk.c slasyf_aa.c slatbs.c slatdf.c slatps.c slatrd.c slatrs.c slatrz.c @@ -682,7 +682,7 @@ set(CLASRC claqr0.c claqr1.c claqr2.c claqr3.c claqr4.c claqr5.c claqsp.c claqsy.c clar1v.c clar2v.c ilaclr.c ilaclc.c clarf.c clarfb.c clarfb_gett.c clarfg.c clarfgp.c clarft.c - clarf1f.c clarf1l.c + clarft_lvl2.c clarf1f.c clarf1l.c clarfx.c clarfy.c clargv.c clarnv.c clarrv.c clartg.c clartv.c clarz.c clarzb.c clarzt.c clascl.c claset.c clasr.c classq.c clasyf.c clasyf_rook.c clasyf_rk.c clasyf_aa.c @@ -765,8 +765,8 @@ set(DLASRC dlaqgb.c dlaqge.c dlaqp2.c dlaqp2rk.c dlaqp3rk.c dlaqps.c dlaqsb.c dlaqsp.c dlaqsy.c dlaqr0.c dlaqr1.c dlaqr2.c dlaqr3.c dlaqr4.c dlaqr5.c dlaqtr.c dlar1v.c dlar2v.c iladlr.c iladlc.c - dlarf.c dlarfb.c dlarfb_gett.c dlarfg.c dlarfgp.c dlarft.c dlarfx.c dlarfy.c - dlarf1f.c dlarf1l.c dlargv.c dlarrv.c dlartv.c + dlarf.c dlarfb.c dlarfb_gett.c dlarfg.c dlarfgp.c dlarft.c dlarft_lvl2.c + dlarfx.c dlarfy.c dlarf1f.c dlarf1l.c dlargv.c dlarrv.c dlartv.c dlarz.c dlarzb.c dlarzt.c dlasy2.c dlasyf.c dlasyf_rook.c dlasyf_rk.c dlasyf_aa.c dlatbs.c dlatdf.c dlatps.c dlatrd.c dlatrs.c dlatrz.c @@ -874,7 +874,7 @@ set(ZLASRC zlaqr0.c zlaqr1.c zlaqr2.c zlaqr3.c zlaqr4.c zlaqr5.c zlaqsp.c zlaqsy.c zlar1v.c zlar2v.c ilazlr.c ilazlc.c zlarcm.c zlarf.c zlarfb.c zlarfb_gett.c zlarf1f.c zlarf1l.c - zlarfg.c zlarfgp.c zlarft.c + zlarfg.c zlarfgp.c zlarft.c zlarft_lvl2.c zlarfx.c zlarfy.c zlargv.c zlarnv.c zlarrv.c zlartg.c zlartv.c zlarz.c zlarzb.c zlarzt.c zlascl.c zlaset.c zlasr.c zlassq.c zlasyf.c zlasyf_rook.c zlasyf_rk.c zlasyf_aa.c diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index ebf3431a92..2200a4628d 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -154,7 +154,7 @@ SLASRC_O = \ slaqgb.o slaqge.o slaqp2.o slaqp2rk.o slaqp3rk.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \ slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \ slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \ - slarf.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \ + slarf.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarft_lvl2.o slarfx.o slarfy.o slargv.o \ slarf1f.o slarf1l.o slarrv.o slartv.o \ slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \ slasyf_rk.o \ @@ -270,7 +270,7 @@ CLASRC_O = \ claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \ claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \ claqz0.o claqz1.o claqz2.o claqz3.o \ - clarf.o clarfb.o clarfb_gett.o clarfg.o clarft.o clarfgp.o \ + clarf.o clarfb.o clarfb_gett.o clarfg.o clarft.o clarft_lvl2.o clarfgp.o \ clarf1f.o clarf1l.o \ clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \ clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \ @@ -364,7 +364,7 @@ DLASRC_O = \ dlaqgb.o dlaqge.o dlaqp2.o dlaqp2rk.o dlaqp3rk.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \ dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ - dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \ + dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarft_lvl2.o dlarfx.o dlarfy.o \ dlarf1f.o dlarf1l.o dlargv.o dlarrv.o dlartv.o \ dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \ dlasyf.o dlasyf_rook.o dlasyf_rk.o \ @@ -479,7 +479,7 @@ ZLASRC_O = \ zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \ zlaqz0.o zlaqz1.o zlaqz2.o zlaqz3.o \ zlarcm.o zlarf.o zlarfb.o zlarfb_gett.o \ - zlarfg.o zlarft.o zlarfgp.o zlarf1f.o zlarf1l.o \ + zlarfg.o zlarft.o zlarft_lvl2.o zlarfgp.o zlarf1f.o zlarf1l.o \ zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \ zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \ zlassq.o zlaswp.o zlasyf.o zlasyf_rook.o zlasyf_rk.o zlasyf_aa.o \ diff --git a/lapack-netlib/SRC/clarft.c b/lapack-netlib/SRC/clarft.c index f44cb810e7..e8e518c5ca 100644 --- a/lapack-netlib/SRC/clarft.c +++ b/lapack-netlib/SRC/clarft.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -525,12 +524,13 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ */ - - /* Table of constant values */ static complex c_b1 = {1.f,0.f}; -static integer c__1 = 1; +static complex c_b3 = {-1.f,0.f}; +static integer c__3 = 3; +static integer c_n1 = -1; +static integer c__2 = 2; /* > \brief \b CLARFT forms the triangular factor T of a block reflector H = I - vtvH */ @@ -539,7 +539,6 @@ static integer c__1 = 1; /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ -/* > \htmlonly */ /* > Download CLARFT + dependencies */ /* > */ @@ -550,12 +549,11 @@ f"> */ /* > */ /* > [TXT] */ -/* > \endhtmlonly */ /* Definition: */ /* =========== */ -/* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ +/* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ /* CHARACTER DIRECT, STOREV */ /* INTEGER K, LDT, LDV, N */ @@ -663,9 +661,7 @@ f"> */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ -/* > \date December 2016 */ - -/* > \ingroup complexOTHERauxiliary */ +/* > \ingroup larft */ /* > \par Further Details: */ /* ===================== */ @@ -694,255 +690,539 @@ f"> */ /* > \endverbatim */ /* > */ /* ===================================================================== */ -/* Subroutine */ void clarft_(char *direct, char *storev, integer *n, integer * +/* Subroutine */ int clarft_(char *direct, char *storev, integer *n, integer * k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt) { /* System generated locals */ - integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1={0.,0.}, q__2={0.,0.}, q__3={0.,0.}; + address a__1[2]; + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2[2], i__3, i__4; + complex q__1; + char ch__1[2]; /* Local variables */ - integer i__, j; - extern /* Subroutine */ void cgemm_(char *, char *, integer *, integer *, + integer i__, j, l; + logical lq, ql, qr; + integer nx; + extern /* Subroutine */ int clarft_lvl2__(char *, char *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *); + logical dirf, colv; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *), cgemv_(char *, - integer *, integer *, complex *, complex *, integer *, complex *, - integer *, complex *, complex *, integer *); + complex *, complex *, integer *); extern logical lsame_(char *, char *); - integer lastv; - extern /* Subroutine */ void ctrmv_(char *, char *, char *, integer *, - complex *, integer *, complex *, integer *); - integer prevlastv; - extern /* Subroutine */ void mecago_(); + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), clacpy_(char *, + integer *, integer *, complex *, integer *, complex *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); -/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK auxiliary routine -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ -/* December 2016 */ -/* ===================================================================== */ + + + + + + + + + + +/* The general scheme used is inspired by the approach inside DGEQRT3 */ +/* which was (at the time of writing this code): */ +/* Based on the algorithm of Elmroth and Gustavson, */ +/* IBM J. Res. Develop. Vol 44 No. 4 July 2000. */ + /* Quick return if possible */ /* Parameter adjustments */ v_dim1 = *ldv; - v_offset = 1 + v_dim1 * 1; + v_offset = 1 + v_dim1; v -= v_offset; --tau; t_dim1 = *ldt; - t_offset = 1 + t_dim1 * 1; + t_offset = 1 + t_dim1; t -= t_offset; /* Function Body */ - if (*n == 0) { - return; + if (*n == 0 || *k == 0) { + return 0; } - if (lsame_(direct, "F")) { - prevlastv = *n; - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - prevlastv = f2cmax(prevlastv,i__); - i__2 = i__; - if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) { +/* Base case */ -/* H(i) = I */ + if (*n == 1 || *k == 1) { + i__1 = t_dim1 + 1; + t[i__1].r = tau[1].r, t[i__1].i = tau[1].i; + return 0; + } - i__2 = i__; - for (j = 1; j <= i__2; ++j) { - i__3 = j + i__ * t_dim1; - t[i__3].r = 0.f, t[i__3].i = 0.f; - } - } else { - -/* general case */ - - if (lsame_(storev, "C")) { -/* Skip any trailing zeros. */ - i__2 = i__ + 1; - for (lastv = *n; lastv >= i__2; --lastv) { - i__3 = lastv + i__ * v_dim1; - if (v[i__3].r != 0.f || v[i__3].i != 0.f) { - myexit_(); - } - } - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = j + i__ * t_dim1; - i__4 = i__; - q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i; - r_cnjg(&q__3, &v[i__ + j * v_dim1]); - q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = - q__2.r * q__3.i + q__2.i * q__3.r; - t[i__3].r = q__1.r, t[i__3].i = q__1.i; - } - j = f2cmin(lastv,prevlastv); - -/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */ - - i__2 = j - i__; - i__3 = i__ - 1; - i__4 = i__; - q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; - cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__ - + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], & - c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1); - } else { -/* Skip any trailing zeros. */ - i__2 = i__ + 1; - for (lastv = *n; lastv >= i__2; --lastv) { - i__3 = i__ + lastv * v_dim1; - if (v[i__3].r != 0.f || v[i__3].i != 0.f) { - myexit_(); - } - } - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = j + i__ * t_dim1; - i__4 = i__; - q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i; - i__5 = j + i__ * v_dim1; - q__1.r = q__2.r * v[i__5].r - q__2.i * v[i__5].i, - q__1.i = q__2.r * v[i__5].i + q__2.i * v[i__5] - .r; - t[i__3].r = q__1.r, t[i__3].i = q__1.i; - } - j = f2cmin(lastv,prevlastv); - -/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */ - - i__2 = i__ - 1; - i__3 = j - i__; - i__4 = i__; - q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; - cgemm_("N", "C", &i__2, &c__1, &i__3, &q__1, &v[(i__ + 1) - * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], - ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt); - } +/* Determine when to cross over into the level 2 based implementation */ + +/* Writing concatenation */ + i__2[0] = 1, a__1[0] = direct; + i__2[1] = 1, a__1[1] = storev; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nx = ilaenv_(&c__3, "CLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (*k < nx) { + clarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + return 0; + } -/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ - - i__2 = i__ - 1; - ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); - i__2 = i__ + i__ * t_dim1; - i__3 = i__; - t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; - if (i__ > 1) { - prevlastv = f2cmax(prevlastv,lastv); - } else { - prevlastv = lastv; - } +/* Beginning of executable statements */ + + l = *k / 2; + +/* Determine what kind of Q we need to compute */ +/* We assume that if the user doesn't provide 'F' for DIRECT, */ +/* then they meant to provide 'B' and if they don't provide */ +/* 'C' for STOREV, then they meant to provide 'R' */ + + dirf = lsame_(direct, "F"); + colv = lsame_(storev, "C"); + +/* QR happens when we have forward direction in column storage */ + + qr = dirf && colv; + +/* LQ happens when we have forward direction in row storage */ + + lq = dirf && ! colv; + +/* QL happens when we have backward direction in column storage */ + + ql = ! dirf && colv; + +/* The last case is RQ. Due to how we structured this, if the */ +/* above 3 are false, then RQ must be true, so we never store */ +/* this */ +/* RQ happens when we have backward direction in row storage */ +/* RQ = (.NOT.DIRF).AND.(.NOT.COLV) */ + + if (qr) { + +/* Break V apart into 6 components */ + +/* V = |---------------| */ +/* |V_{1,1} 0 | */ +/* |V_{2,1} V_{2,2}| */ +/* |V_{3,1} V_{3,2}| */ +/* |---------------| */ + +/* V_{1,1}\in\C^{l,l} unit lower triangular */ +/* V_{2,1}\in\C^{k-l,l} rectangular */ +/* V_{3,1}\in\C^{n-k,l} rectangular */ + +/* V_{2,2}\in\C^{k-l,k-l} unit lower triangular */ +/* V_{3,2}\in\C^{n-k,k-l} rectangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} T_{1,2}| */ +/* |0 T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\C^{l, l} upper triangular */ +/* T_{2,2}\in\C^{k-l, k-l} upper triangular */ +/* T_{1,2}\in\C^{l, k-l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */ +/* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */ + +/* Define T{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */ + +/* Then, we can define the matrix V as */ +/* V = |-------| */ +/* |V_1 V_2| */ +/* |-------| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V*T*V' */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{1,2} */ + +/* Compute T_{1,1} recursively */ + + clarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + i__1 = *n - l; + i__3 = *k - l; + clarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1], + ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt); + +/* Compute T_{1,2} */ +/* T_{1,2} = V_{2,1}' */ + + i__1 = l; + for (j = 1; j <= i__1; ++j) { + i__3 = *k - l; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = j + (l + i__) * t_dim1; + r_cnjg(&q__1, &v[l + i__ + j * v_dim1]); + t[i__4].r = q__1.r, t[i__4].i = q__1.i; } } - } else { - prevlastv = 1; - for (i__ = *k; i__ >= 1; --i__) { - i__1 = i__; - if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) { -/* H(i) = I */ +/* T_{1,2} = T_{1,2}*V_{2,2} */ - i__1 = *k; - for (j = i__; j <= i__1; ++j) { - i__2 = j + i__ * t_dim1; - t[i__2].r = 0.f, t[i__2].i = 0.f; - } - } else { - -/* general case */ - - if (i__ < *k) { - if (lsame_(storev, "C")) { -/* Skip any leading zeros. */ - i__1 = i__ - 1; - for (lastv = 1; lastv <= i__1; ++lastv) { - i__2 = lastv + i__ * v_dim1; - if (v[i__2].r != 0.f || v[i__2].i != 0.f) { - myexit_(); - } - } - i__1 = *k; - for (j = i__ + 1; j <= i__1; ++j) { - i__2 = j + i__ * t_dim1; - i__3 = i__; - q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i; - r_cnjg(&q__3, &v[*n - *k + i__ + j * v_dim1]); - q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, - q__1.i = q__2.r * q__3.i + q__2.i * - q__3.r; - t[i__2].r = q__1.r, t[i__2].i = q__1.i; - } - j = f2cmax(lastv,prevlastv); - -/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */ - - i__1 = *n - *k + i__ - j; - i__2 = *k - i__; - i__3 = i__; - q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; - cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[ - j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * - v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ * - t_dim1], &c__1); - } else { -/* Skip any leading zeros. */ - i__1 = i__ - 1; - for (lastv = 1; lastv <= i__1; ++lastv) { - i__2 = i__ + lastv * v_dim1; - if (v[i__2].r != 0.f || v[i__2].i != 0.f) { - myexit_(); - } - } - i__1 = *k; - for (j = i__ + 1; j <= i__1; ++j) { - i__2 = j + i__ * t_dim1; - i__3 = i__; - q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i; - i__4 = j + (*n - *k + i__) * v_dim1; - q__1.r = q__2.r * v[i__4].r - q__2.i * v[i__4].i, - q__1.i = q__2.r * v[i__4].i + q__2.i * v[ - i__4].r; - t[i__2].r = q__1.r, t[i__2].i = q__1.i; - } - j = f2cmax(lastv,prevlastv); - -/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */ - - i__1 = *k - i__; - i__2 = *n - *k + i__ - j; - i__3 = i__; - q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; - cgemm_("N", "C", &i__1, &c__1, &i__2, &q__1, &v[i__ + - 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], - ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt); - } - -/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ - - i__1 = *k - i__; - ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ - + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * - t_dim1], &c__1) - ; - if (i__ > 1) { - prevlastv = f2cmin(prevlastv,lastv); - } else { - prevlastv = lastv; - } - } - i__1 = i__ + i__ * t_dim1; - i__2 = i__; - t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; + i__1 = *k - l; + ctrmm_("Right", "Lower", "No transpose", "Unit", &l, &i__1, &c_b1, &v[ + l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__1 = *k - l; + i__3 = *n - *k; + cgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[*k + + 1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b1, & + t[(l + 1) * t_dim1 + 1], ldt); + +/* At this point, we have that T_{1,2} = V_1'*V_2 */ +/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */ +/* respectively. */ + +/* T_{1,2} = -T_{1,1}*T_{1,2} */ + + i__1 = *k - l; + ctrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3, + &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*T_{2,2} */ + + i__1 = *k - l; + ctrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1, + &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1], + ldt); + } else if (lq) { + +/* Break V apart into 6 components */ + +/* V = |----------------------| */ +/* |V_{1,1} V_{1,2} V{1,3}| */ +/* |0 V_{2,2} V{2,3}| */ +/* |----------------------| */ + +/* V_{1,1}\in\C^{l,l} unit upper triangular */ +/* V_{1,2}\in\C^{l,k-l} rectangular */ +/* V_{1,3}\in\C^{l,n-k} rectangular */ + +/* V_{2,2}\in\C^{k-l,k-l} unit upper triangular */ +/* V_{2,3}\in\C^{k-l,n-k} rectangular */ + +/* Where l = floor(k/2) */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} T_{1,2}| */ +/* |0 T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\C^{l, l} upper triangular */ +/* T_{2,2}\in\C^{k-l, k-l} upper triangular */ +/* T_{1,2}\in\C^{l, k-l} rectangular */ + +/* Then, consider the product: */ + +/* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */ +/* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */ + +/* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */ + +/* Then, we can define the matrix V as */ +/* V = |---| */ +/* |V_1| */ +/* |V_2| */ +/* |---| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V'*T*V */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{1,2} */ + +/* Compute T_{1,1} recursively */ + + clarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + i__1 = *n - l; + i__3 = *k - l; + clarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1], + ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt); + +/* Compute T_{1,2} */ +/* T_{1,2} = V_{1,2} */ + + i__1 = *k - l; + clacpy_("All", &l, &i__1, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) * + t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*V_{2,2}' */ + + i__1 = *k - l; + ctrmm_("Right", "Upper", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[l + + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__1 = *k - l; + i__3 = *n - *k; + cgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[(*k + + 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, & + c_b1, &t[(l + 1) * t_dim1 + 1], ldt); + +/* At this point, we have that T_{1,2} = V_1*V_2' */ +/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */ +/* respectively. */ + +/* T_{1,2} = -T_{1,1}*T_{1,2} */ + + i__1 = *k - l; + ctrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3, + &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*T_{2,2} */ + + i__1 = *k - l; + ctrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1, + &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1], + ldt); + } else if (ql) { + +/* Break V apart into 6 components */ + +/* V = |---------------| */ +/* |V_{1,1} V_{1,2}| */ +/* |V_{2,1} V_{2,2}| */ +/* |0 V_{3,2}| */ +/* |---------------| */ + +/* V_{1,1}\in\C^{n-k,k-l} rectangular */ +/* V_{2,1}\in\C^{k-l,k-l} unit upper triangular */ + +/* V_{1,2}\in\C^{n-k,l} rectangular */ +/* V_{2,2}\in\C^{k-l,l} rectangular */ +/* V_{3,2}\in\C^{l,l} unit upper triangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} 0 | */ +/* |T_{2,1} T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */ +/* T_{2,2}\in\C^{l, l} non-unit lower triangular */ +/* T_{2,1}\in\C^{k-l, l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */ +/* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */ + +/* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */ + +/* Then, we can define the matrix V as */ +/* V = |-------| */ +/* |V_1 V_2| */ +/* |-------| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V*T*V' */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{2,1} */ + +/* Compute T_{1,1} recursively */ + + i__1 = *n - l; + i__3 = *k - l; + clarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + clarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, & + tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt); + +/* Compute T_{2,1} */ +/* T_{2,1} = V_{2,2}' */ + + i__1 = *k - l; + for (j = 1; j <= i__1; ++j) { + i__3 = l; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = *k - l + i__ + j * t_dim1; + r_cnjg(&q__1, &v[*n - *k + j + (*k - l + i__) * v_dim1]); + t[i__4].r = q__1.r, t[i__4].i = q__1.i; } } - } - return; -/* End of CLARFT */ +/* T_{2,1} = T_{2,1}*V_{2,1} */ + + i__1 = *k - l; + ctrmm_("Right", "Upper", "No transpose", "Unit", &l, &i__1, &c_b1, &v[ + *n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt); +/* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__1 = *k - l; + i__3 = *n - *k; + cgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[(*k - + l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - + l + 1 + t_dim1], ldt); + +/* At this point, we have that T_{2,1} = V_2'*V_1 */ +/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */ +/* respectively. */ + +/* T_{2,1} = -T_{2,2}*T_{2,1} */ + + i__1 = *k - l; + ctrmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b3, + &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*T_{1,1} */ + + i__1 = *k - l; + ctrmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b1, + &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt); + } else { + +/* Else means RQ case */ + +/* Break V apart into 6 components */ + +/* V = |-----------------------| */ +/* |V_{1,1} V_{1,2} 0 | */ +/* |V_{2,1} V_{2,2} V_{2,3}| */ +/* |-----------------------| */ + +/* V_{1,1}\in\C^{k-l,n-k} rectangular */ +/* V_{1,2}\in\C^{k-l,k-l} unit lower triangular */ + +/* V_{2,1}\in\C^{l,n-k} rectangular */ +/* V_{2,2}\in\C^{l,k-l} rectangular */ +/* V_{2,3}\in\C^{l,l} unit lower triangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} 0 | */ +/* |T_{2,1} T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */ +/* T_{2,2}\in\C^{l, l} non-unit lower triangular */ +/* T_{2,1}\in\C^{k-l, l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */ +/* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */ + +/* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */ + +/* Then, we can define the matrix V as */ +/* V = |---| */ +/* |V_1| */ +/* |V_2| */ +/* |---| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V'*T*V */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{2,1} */ + +/* Compute T_{1,1} recursively */ + + i__1 = *n - l; + i__3 = *k - l; + clarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + clarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k + - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt); + +/* Compute T_{2,1} */ +/* T_{2,1} = V_{2,2} */ + + i__1 = *k - l; + clacpy_("All", &l, &i__1, &v[*k - l + 1 + (*n - *k + 1) * v_dim1], + ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*V_{1,2}' */ + + i__1 = *k - l; + ctrmm_("Right", "Lower", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[(* + n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__1 = *k - l; + i__3 = *n - *k; + cgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[*k - + l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - l + 1 + + t_dim1], ldt); + +/* At this point, we have that T_{2,1} = V_2*V_1' */ +/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */ +/* respectively. */ + +/* T_{2,1} = -T_{2,2}*T_{2,1} */ + + i__1 = *k - l; + ctrmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b3, & + t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*T_{1,1} */ + + i__1 = *k - l; + ctrmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b1, + &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt); + } + return 0; } /* clarft_ */ diff --git a/lapack-netlib/SRC/clarft.f b/lapack-netlib/SRC/clarft.f index de8b97bf9c..3bf2448ba8 100644 --- a/lapack-netlib/SRC/clarft.f +++ b/lapack-netlib/SRC/clarft.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download CLARFT + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -161,6 +159,7 @@ * ===================================================================== RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, $ TAU, T, LDT ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -179,11 +178,12 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * .. Parameters .. * COMPLEX ONE, NEG_ONE, ZERO - PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0) + PARAMETER(ONE=(1.0E+0,0.0E+0), ZERO = (0.0E+0,0.0E+0), + $ NEG_ONE=(-1.0E+0,0.0E+0)) * * .. Local Scalars .. * - INTEGER I,J,L + INTEGER I,J,L,NX LOGICAL QR,LQ,QL,DIRF,COLV * * .. External Subroutines .. @@ -193,7 +193,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * .. External Functions.. * LOGICAL LSAME - EXTERNAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV * * .. Intrinsic Functions.. * @@ -219,6 +220,14 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, RETURN END IF * +* Determine when to cross over into the level 2 based implementation +* + NX = ILAENV(3, "CLARFT", DIRECT // STOREV, N, K, -1, -1) + IF(K.LT.NX) THEN + CALL CLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT) + RETURN + END IF +* * Beginning of executable statements * L = K / 2 diff --git a/lapack-netlib/SRC/clarft_lvl2.c b/lapack-netlib/SRC/clarft_lvl2.c new file mode 100644 index 0000000000..2ca5490ee0 --- /dev/null +++ b/lapack-netlib/SRC/clarft_lvl2.c @@ -0,0 +1,935 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef blasint logical; + +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);} +#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + + +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} + +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CLARFT_LVL2: Level 2 BLAS version for terminating case of CLARFT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download CLARFT_LVL2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */ +/* T, LDT ) */ + +/* CHARACTER DIRECT, STOREV */ +/* INTEGER K, LDT, LDV, N */ +/* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CLARFT_LVL2 forms the triangular factor T of a complex block reflector H */ +/* > of order n, which is defined as a product of k elementary reflectors. */ +/* > */ +/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ +/* > */ +/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ +/* > */ +/* > If STOREV = 'C', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th column of the array V, and */ +/* > */ +/* > H = I - V * T * V**H */ +/* > */ +/* > If STOREV = 'R', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th row of the array V, and */ +/* > */ +/* > H = I - V**H * T * V */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Specifies the order in which the elementary reflectors are */ +/* > multiplied to form the block reflector: */ +/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] STOREV */ +/* > \verbatim */ +/* > STOREV is CHARACTER*1 */ +/* > Specifies how the vectors which define the elementary */ +/* > reflectors are stored (see also Further Details): */ +/* > = 'C': columnwise */ +/* > = 'R': rowwise */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the block reflector H. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The order of the triangular factor T (= the number of */ +/* > elementary reflectors). K >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is COMPLEX array, dimension */ +/* > (LDV,K) if STOREV = 'C' */ +/* > (LDV,N) if STOREV = 'R' */ +/* > The matrix V. See further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is COMPLEX array, dimension (LDT,K) */ +/* > The k by k triangular factor T of the block reflector. */ +/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ +/* > lower triangular. The rest of the array is not used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= K. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larft */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The shape of the matrix V and the storage of the vectors which define */ +/* > the H(i) is best illustrated by the following example with n = 5 and */ +/* > k = 3. The elements equal to 1 are not stored. */ +/* > */ +/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ +/* > */ +/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* > ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* > ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ +/* > */ +/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* > ( 1 v3 ) */ +/* > ( 1 ) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int clarft_lvl2__(char *direct, char *storev, integer *n, + integer *k, complex *v, integer *ldv, complex *tau, complex *t, + integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2, q__3; + + /* Local variables */ + integer i__, j, prevlastv; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *), cgemv_(char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *, complex *, complex *, integer *); + extern logical lsame_(char *, char *); + integer lastv; + extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *), mecago_(); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = f2cmax(prevlastv,i__); + i__2 = i__; + if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + t[i__3].r = 0.f, t[i__3].i = 0.f; + } + } else { + +/* general case */ + + if (lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = lastv + i__ * v_dim1; + if (v[i__3].r != 0.f || v[i__3].i != 0.f) { + myexit_(); + } + } + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + i__4 = i__; + q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i; + r_cnjg(&q__3, &v[i__ + j * v_dim1]); + q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = + q__2.r * q__3.i + q__2.i * q__3.r; + t[i__3].r = q__1.r, t[i__3].i = q__1.i; + } + j = f2cmin(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */ + + i__2 = j - i__; + i__3 = i__ - 1; + i__4 = i__; + q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; + cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__ + + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], & + c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = i__ + lastv * v_dim1; + if (v[i__3].r != 0.f || v[i__3].i != 0.f) { + myexit_(); + } + } + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + i__4 = i__; + q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i; + i__5 = j + i__ * v_dim1; + q__1.r = q__2.r * v[i__5].r - q__2.i * v[i__5].i, + q__1.i = q__2.r * v[i__5].i + q__2.i * v[i__5] + .r; + t[i__3].r = q__1.r, t[i__3].i = q__1.i; + } + j = f2cmin(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */ + + i__2 = i__ - 1; + i__3 = j - i__; + i__4 = i__; + q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; + cgemm_("N", "C", &i__2, &c__1, &i__3, &q__1, &v[(i__ + 1) + * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], + ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt); + } + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + if (i__ > 1) { + prevlastv = f2cmax(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + i__1 = i__; + if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + t[i__2].r = 0.f, t[i__2].i = 0.f; + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = lastv + i__ * v_dim1; + if (v[i__2].r != 0.f || v[i__2].i != 0.f) { + myexit_(); + } + } + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + i__3 = i__; + q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i; + r_cnjg(&q__3, &v[*n - *k + i__ + j * v_dim1]); + q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, + q__1.i = q__2.r * q__3.i + q__2.i * + q__3.r; + t[i__2].r = q__1.r, t[i__2].i = q__1.i; + } + j = f2cmax(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */ + + i__1 = *n - *k + i__ - j; + i__2 = *k - i__; + i__3 = i__; + q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; + cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[ + j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * + v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ * + t_dim1], &c__1); + } else { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = i__ + lastv * v_dim1; + if (v[i__2].r != 0.f || v[i__2].i != 0.f) { + myexit_(); + } + } + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + i__3 = i__; + q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i; + i__4 = j + (*n - *k + i__) * v_dim1; + q__1.r = q__2.r * v[i__4].r - q__2.i * v[i__4].i, + q__1.i = q__2.r * v[i__4].i + q__2.i * v[ + i__4].r; + t[i__2].r = q__1.r, t[i__2].i = q__1.i; + } + j = f2cmax(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j; + i__3 = i__; + q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; + cgemm_("N", "C", &i__1, &c__1, &i__2, &q__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt); + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = f2cmin(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + i__1 = i__ + i__ * t_dim1; + i__2 = i__; + t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; + } + } + } + return 0; + +/* End of CLARFT_LVL2 */ + +} /* clarft_lvl2__ */ + diff --git a/lapack-netlib/SRC/clarft_lvl2.f b/lapack-netlib/SRC/clarft_lvl2.f new file mode 100644 index 0000000000..3b0aea1132 --- /dev/null +++ b/lapack-netlib/SRC/clarft_lvl2.f @@ -0,0 +1,328 @@ +*> \brief \b CLARFT_LVL2: Level 2 BLAS version for terminating case of CLARFT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download CLARFT_LVL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, +* T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFT_LVL2 forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, + $ T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL CGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, + $ ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J, + $ -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of CLARFT_LVL2 +* + END diff --git a/lapack-netlib/SRC/dlarft.c b/lapack-netlib/SRC/dlarft.c index 65ce8807fc..9a968c286f 100644 --- a/lapack-netlib/SRC/dlarft.c +++ b/lapack-netlib/SRC/dlarft.c @@ -190,8 +190,8 @@ typedef struct Namelist Namelist; #define c_abs(z) (cabsf(Cf(z))) #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } #ifdef _MSC_VER -#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} -#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);} +#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);} +#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);} #else #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -378,19 +377,24 @@ static integer smaxloc_(float *w, integer s, integer e, integer *n) if (w[i-1]>m) mi=i ,m=w[i-1]; return mi-s+1; } + static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { integer n = *n_, incx = *incx_, incy = *incy_, i; #ifdef _MSC_VER _Fcomplex zdotc = {0.0, 0.0}; if (incx == 1 && incy == 1) { for (i=0;i \brief \b DLARFT forms the triangular factor T of a block reflector H = I - vtvH */ @@ -522,7 +539,6 @@ static doublereal c_b6 = 1.; /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ -/* > \htmlonly */ /* > Download DLARFT + dependencies */ /* > */ @@ -533,12 +549,11 @@ f"> */ /* > */ /* > [TXT] */ -/* > \endhtmlonly */ /* Definition: */ /* =========== */ -/* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ +/* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ /* CHARACTER DIRECT, STOREV */ /* INTEGER K, LDT, LDV, N */ @@ -646,9 +661,7 @@ f"> */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ -/* > \date December 2016 */ - -/* > \ingroup doubleOTHERauxiliary */ +/* > \ingroup larft */ /* > \par Further Details: */ /* ===================== */ @@ -677,212 +690,536 @@ f"> */ /* > \endverbatim */ /* > */ /* ===================================================================== */ -/* Subroutine */ void dlarft_(char *direct, char *storev, integer *n, integer * +/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer * k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, integer *ldt) { /* System generated locals */ - integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; - doublereal d__1; + address a__1[2]; + integer t_dim1, t_offset, v_dim1, v_offset, i__1[2], i__2, i__3; + char ch__1[2]; /* Local variables */ - integer i__, j; + integer i__, j, l; + logical lq, ql, qr; + integer nx; + extern /* Subroutine */ int dlarft_lvl2__(char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + logical dirf, colv; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); - extern /* Subroutine */ void dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *); - integer lastv; - extern /* Subroutine */ void dtrmv_(char *, char *, char *, integer *, - doublereal *, integer *, doublereal *, integer *); - integer prevlastv; + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), dlacpy_( + char *, integer *, integer *, doublereal *, integer *, doublereal + *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); -/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK auxiliary routine -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ -/* December 2016 */ -/* ===================================================================== */ + + + + + + + + +/* The general scheme used is inspired by the approach inside DGEQRT3 */ +/* which was (at the time of writing this code): */ +/* Based on the algorithm of Elmroth and Gustavson, */ +/* IBM J. Res. Develop. Vol 44 No. 4 July 2000. */ + /* Quick return if possible */ /* Parameter adjustments */ v_dim1 = *ldv; - v_offset = 1 + v_dim1 * 1; + v_offset = 1 + v_dim1; v -= v_offset; --tau; t_dim1 = *ldt; - t_offset = 1 + t_dim1 * 1; + t_offset = 1 + t_dim1; t -= t_offset; /* Function Body */ - if (*n == 0) { - return; + if (*n == 0 || *k == 0) { + return 0; } - if (lsame_(direct, "F")) { - prevlastv = *n; - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - prevlastv = f2cmax(i__,prevlastv); - if (tau[i__] == 0.) { +/* Base case */ -/* H(i) = I */ + if (*n == 1 || *k == 1) { + t[t_dim1 + 1] = tau[1]; + return 0; + } - i__2 = i__; - for (j = 1; j <= i__2; ++j) { - t[j + i__ * t_dim1] = 0.; - } - } else { - -/* general case */ - - if (lsame_(storev, "C")) { -/* Skip any trailing zeros. */ - i__2 = i__ + 1; - for (lastv = *n; lastv >= i__2; --lastv) { - if (v[lastv + i__ * v_dim1] != 0.) { - myexit_(); - } - } - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1]; - } - j = f2cmin(lastv,prevlastv); - -/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */ - - i__2 = j - i__; - i__3 = i__ - 1; - d__1 = -tau[i__]; - dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + 1 + - v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, & - c_b6, &t[i__ * t_dim1 + 1], &c__1); - } else { -/* Skip any trailing zeros. */ - i__2 = i__ + 1; - for (lastv = *n; lastv >= i__2; --lastv) { - if (v[i__ + lastv * v_dim1] != 0.) { - myexit_(); - } - } - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1]; - } - j = f2cmin(lastv,prevlastv); - -/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */ - - i__2 = i__ - 1; - i__3 = j - i__; - d__1 = -tau[i__]; - dgemv_("No transpose", &i__2, &i__3, &d__1, &v[(i__ + 1) * - v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], - ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1); - } +/* Determine when to cross over into the level 2 based implementation */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = direct; + i__1[1] = 1, a__1[1] = storev; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nx = ilaenv_(&c__3, "DLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (*k < nx) { + dlarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + return 0; + } -/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ +/* Beginning of executable statements */ - i__2 = i__ - 1; - dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); - t[i__ + i__ * t_dim1] = tau[i__]; - if (i__ > 1) { - prevlastv = f2cmax(prevlastv,lastv); - } else { - prevlastv = lastv; - } + l = *k / 2; + +/* Determine what kind of Q we need to compute */ +/* We assume that if the user doesn't provide 'F' for DIRECT, */ +/* then they meant to provide 'B' and if they don't provide */ +/* 'C' for STOREV, then they meant to provide 'R' */ + + dirf = lsame_(direct, "F"); + colv = lsame_(storev, "C"); + +/* QR happens when we have forward direction in column storage */ + + qr = dirf && colv; + +/* LQ happens when we have forward direction in row storage */ + + lq = dirf && ! colv; + +/* QL happens when we have backward direction in column storage */ + + ql = ! dirf && colv; + +/* The last case is RQ. Due to how we structured this, if the */ +/* above 3 are false, then RQ must be true, so we never store */ +/* this */ +/* RQ happens when we have backward direction in row storage */ +/* RQ = (.NOT.DIRF).AND.(.NOT.COLV) */ + + if (qr) { + +/* Break V apart into 6 components */ + +/* V = |---------------| */ +/* |V_{1,1} 0 | */ +/* |V_{2,1} V_{2,2}| */ +/* |V_{3,1} V_{3,2}| */ +/* |---------------| */ + +/* V_{1,1}\in\R^{l,l} unit lower triangular */ +/* V_{2,1}\in\R^{k-l,l} rectangular */ +/* V_{3,1}\in\R^{n-k,l} rectangular */ + +/* V_{2,2}\in\R^{k-l,k-l} unit lower triangular */ +/* V_{3,2}\in\R^{n-k,k-l} rectangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} T_{1,2}| */ +/* |0 T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\R^{l, l} upper triangular */ +/* T_{2,2}\in\R^{k-l, k-l} upper triangular */ +/* T_{1,2}\in\R^{l, k-l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */ +/* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */ + +/* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */ + +/* Then, we can define the matrix V as */ +/* V = |-------| */ +/* |V_1 V_2| */ +/* |-------| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V*T*V' */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{1,2} */ + +/* Compute T_{1,1} recursively */ + + dlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + i__2 = *n - l; + i__3 = *k - l; + dlarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1], + ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt); + +/* Compute T_{1,2} */ +/* T_{1,2} = V_{2,1}' */ + + i__2 = l; + for (j = 1; j <= i__2; ++j) { + i__3 = *k - l; + for (i__ = 1; i__ <= i__3; ++i__) { + t[j + (l + i__) * t_dim1] = v[l + i__ + j * v_dim1]; } } - } else { - prevlastv = 1; - for (i__ = *k; i__ >= 1; --i__) { - if (tau[i__] == 0.) { -/* H(i) = I */ +/* T_{1,2} = T_{1,2}*V_{2,2} */ - i__1 = *k; - for (j = i__; j <= i__1; ++j) { - t[j + i__ * t_dim1] = 0.; - } - } else { - -/* general case */ - - if (i__ < *k) { - if (lsame_(storev, "C")) { -/* Skip any leading zeros. */ - i__1 = i__ - 1; - for (lastv = 1; lastv <= i__1; ++lastv) { - if (v[lastv + i__ * v_dim1] != 0.) { - myexit_(); - } - } - i__1 = *k; - for (j = i__ + 1; j <= i__1; ++j) { - t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__ - + j * v_dim1]; - } - j = f2cmax(lastv,prevlastv); - -/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */ - - i__1 = *n - *k + i__ - j; - i__2 = *k - i__; - d__1 = -tau[i__]; - dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ - + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & - c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], & - c__1); - } else { -/* Skip any leading zeros. */ - i__1 = i__ - 1; - for (lastv = 1; lastv <= i__1; ++lastv) { - if (v[i__ + lastv * v_dim1] != 0.) { - myexit_(); - } - } - i__1 = *k; - for (j = i__ + 1; j <= i__1; ++j) { - t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k - + i__) * v_dim1]; - } - j = f2cmax(lastv,prevlastv); - -/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */ - - i__1 = *k - i__; - i__2 = *n - *k + i__ - j; - d__1 = -tau[i__]; - dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + - 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], - ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1); - } - -/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ - - i__1 = *k - i__; - dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ - + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * - t_dim1], &c__1) - ; - if (i__ > 1) { - prevlastv = f2cmin(prevlastv,lastv); - } else { - prevlastv = lastv; - } - } - t[i__ + i__ * t_dim1] = tau[i__]; + i__2 = *k - l; + dtrmm_("Right", "Lower", "No transpose", "Unit", &l, &i__2, &c_b13, & + v[l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], + ldt); + +/* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__2 = *k - l; + i__3 = *n - *k; + dgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[*k + + 1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b13, + &t[(l + 1) * t_dim1 + 1], ldt); + +/* At this point, we have that T_{1,2} = V_1'*V_2 */ +/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */ +/* respectively. */ + +/* T_{1,2} = -T_{1,1}*T_{1,2} */ + + i__2 = *k - l; + dtrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22, + &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*T_{2,2} */ + + i__2 = *k - l; + dtrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, & + c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + + 1], ldt); + } else if (lq) { + +/* Break V apart into 6 components */ + +/* V = |----------------------| */ +/* |V_{1,1} V_{1,2} V{1,3}| */ +/* |0 V_{2,2} V{2,3}| */ +/* |----------------------| */ + +/* V_{1,1}\in\R^{l,l} unit upper triangular */ +/* V_{1,2}\in\R^{l,k-l} rectangular */ +/* V_{1,3}\in\R^{l,n-k} rectangular */ + +/* V_{2,2}\in\R^{k-l,k-l} unit upper triangular */ +/* V_{2,3}\in\R^{k-l,n-k} rectangular */ + +/* Where l = floor(k/2) */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} T_{1,2}| */ +/* |0 T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\R^{l, l} upper triangular */ +/* T_{2,2}\in\R^{k-l, k-l} upper triangular */ +/* T_{1,2}\in\R^{l, k-l} rectangular */ + +/* Then, consider the product: */ + +/* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */ +/* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */ + +/* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */ + +/* Then, we can define the matrix V as */ +/* V = |---| */ +/* |V_1| */ +/* |V_2| */ +/* |---| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V'*T*V */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{1,2} */ + +/* Compute T_{1,1} recursively */ + + dlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + i__2 = *n - l; + i__3 = *k - l; + dlarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1], + ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt); + +/* Compute T_{1,2} */ +/* T_{1,2} = V_{1,2} */ + + i__2 = *k - l; + dlacpy_("All", &l, &i__2, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) * + t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*V_{2,2}' */ + + i__2 = *k - l; + dtrmm_("Right", "Upper", "Transpose", "Unit", &l, &i__2, &c_b13, &v[l + + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__2 = *k - l; + i__3 = *n - *k; + dgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[(*k + + 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, & + c_b13, &t[(l + 1) * t_dim1 + 1], ldt); + +/* At this point, we have that T_{1,2} = V_1*V_2' */ +/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */ +/* respectively. */ + +/* T_{1,2} = -T_{1,1}*T_{1,2} */ + + i__2 = *k - l; + dtrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22, + &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*T_{2,2} */ + + i__2 = *k - l; + dtrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, & + c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + + 1], ldt); + } else if (ql) { + +/* Break V apart into 6 components */ + +/* V = |---------------| */ +/* |V_{1,1} V_{1,2}| */ +/* |V_{2,1} V_{2,2}| */ +/* |0 V_{3,2}| */ +/* |---------------| */ + +/* V_{1,1}\in\R^{n-k,k-l} rectangular */ +/* V_{2,1}\in\R^{k-l,k-l} unit upper triangular */ + +/* V_{1,2}\in\R^{n-k,l} rectangular */ +/* V_{2,2}\in\R^{k-l,l} rectangular */ +/* V_{3,2}\in\R^{l,l} unit upper triangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} 0 | */ +/* |T_{2,1} T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */ +/* T_{2,2}\in\R^{l, l} non-unit lower triangular */ +/* T_{2,1}\in\R^{k-l, l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */ +/* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */ + +/* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */ + +/* Then, we can define the matrix V as */ +/* V = |-------| */ +/* |V_1 V_2| */ +/* |-------| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V*T*V' */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{2,1} */ + +/* Compute T_{1,1} recursively */ + + i__2 = *n - l; + i__3 = *k - l; + dlarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + dlarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, & + tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt); + +/* Compute T_{2,1} */ +/* T_{2,1} = V_{2,2}' */ + + i__2 = *k - l; + for (j = 1; j <= i__2; ++j) { + i__3 = l; + for (i__ = 1; i__ <= i__3; ++i__) { + t[*k - l + i__ + j * t_dim1] = v[*n - *k + j + (*k - l + i__) + * v_dim1]; } } - } - return; -/* End of DLARFT */ +/* T_{2,1} = T_{2,1}*V_{2,1} */ + + i__2 = *k - l; + dtrmm_("Right", "Upper", "No transpose", "Unit", &l, &i__2, &c_b13, & + v[*n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + i__2 = *k - l; + i__3 = *n - *k; + dgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[(*k + - l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b13, &t[*k + - l + 1 + t_dim1], ldt); + +/* At this point, we have that T_{2,1} = V_2'*V_1 */ +/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */ +/* respectively. */ + +/* T_{2,1} = -T_{2,2}*T_{2,1} */ + + i__2 = *k - l; + dtrmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__2, &c_b22, + &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*T_{1,1} */ + + i__2 = *k - l; + dtrmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__2, & + c_b13, &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt); + } else { + +/* Else means RQ case */ + +/* Break V apart into 6 components */ + +/* V = |-----------------------| */ +/* |V_{1,1} V_{1,2} 0 | */ +/* |V_{2,1} V_{2,2} V_{2,3}| */ +/* |-----------------------| */ + +/* V_{1,1}\in\R^{k-l,n-k} rectangular */ +/* V_{1,2}\in\R^{k-l,k-l} unit lower triangular */ + +/* V_{2,1}\in\R^{l,n-k} rectangular */ +/* V_{2,2}\in\R^{l,k-l} rectangular */ +/* V_{2,3}\in\R^{l,l} unit lower triangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} 0 | */ +/* |T_{2,1} T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */ +/* T_{2,2}\in\R^{l, l} non-unit lower triangular */ +/* T_{2,1}\in\R^{k-l, l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */ +/* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */ + +/* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */ + +/* Then, we can define the matrix V as */ +/* V = |---| */ +/* |V_1| */ +/* |V_2| */ +/* |---| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V'*T*V */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{2,1} */ + +/* Compute T_{1,1} recursively */ + + i__2 = *n - l; + i__3 = *k - l; + dlarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + dlarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k + - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt); + +/* Compute T_{2,1} */ +/* T_{2,1} = V_{2,2} */ + + i__2 = *k - l; + dlacpy_("All", &l, &i__2, &v[*k - l + 1 + (*n - *k + 1) * v_dim1], + ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*V_{1,2}' */ + + i__2 = *k - l; + dtrmm_("Right", "Lower", "Transpose", "Unit", &l, &i__2, &c_b13, &v[(* + n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__2 = *k - l; + i__3 = *n - *k; + dgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[*k - + l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b13, &t[*k - l + + 1 + t_dim1], ldt); + +/* At this point, we have that T_{2,1} = V_2*V_1' */ +/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */ +/* respectively. */ + +/* T_{2,1} = -T_{2,2}*T_{2,1} */ + + i__2 = *k - l; + dtrmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b22, + &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*T_{1,1} */ + + i__2 = *k - l; + dtrmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b13, + &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt); + } + return 0; } /* dlarft_ */ diff --git a/lapack-netlib/SRC/dlarft.f b/lapack-netlib/SRC/dlarft.f index c27bb1a806..fa2ea0d134 100644 --- a/lapack-netlib/SRC/dlarft.f +++ b/lapack-netlib/SRC/dlarft.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download DLARFT + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -161,6 +159,7 @@ * ===================================================================== RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, $ TAU, T, LDT ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -183,7 +182,7 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * * .. Local Scalars .. * - INTEGER I,J,L + INTEGER I,J,L,NX LOGICAL QR,LQ,QL,DIRF,COLV * * .. External Subroutines .. @@ -193,7 +192,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * .. External Functions.. * LOGICAL LSAME - EXTERNAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV * * The general scheme used is inspired by the approach inside DGEQRT3 * which was (at the time of writing this code): @@ -215,6 +215,14 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, RETURN END IF * +* Determine when to cross over into the level 2 based implementation +* + NX = ILAENV(3, "DLARFT", DIRECT // STOREV, N, K, -1, -1) + IF(K.LT.NX) THEN + CALL DLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT) + RETURN + END IF +* * Beginning of executable statements * L = K / 2 diff --git a/lapack-netlib/SRC/dlarft_lvl2.c b/lapack-netlib/SRC/dlarft_lvl2.c new file mode 100644 index 0000000000..6bd748c436 --- /dev/null +++ b/lapack-netlib/SRC/dlarft_lvl2.c @@ -0,0 +1,895 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef blasint logical; + +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);} +#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + + +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} + +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif + +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} + +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b DLARFT_LVL2: Level 2 BLAS version for terminating case of DLARFT. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download DLARFT_LVL2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */ +/* T, LDT ) */ + +/* CHARACTER DIRECT, STOREV */ +/* INTEGER K, LDT, LDV, N */ +/* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLARFT_LVL2 forms the triangular factor T of a real block reflector H */ +/* > of order n, which is defined as a product of k elementary reflectors. */ +/* > */ +/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ +/* > */ +/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ +/* > */ +/* > If STOREV = 'C', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th column of the array V, and */ +/* > */ +/* > H = I - V * T * V**T */ +/* > */ +/* > If STOREV = 'R', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th row of the array V, and */ +/* > */ +/* > H = I - V**T * T * V */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Specifies the order in which the elementary reflectors are */ +/* > multiplied to form the block reflector: */ +/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] STOREV */ +/* > \verbatim */ +/* > STOREV is CHARACTER*1 */ +/* > Specifies how the vectors which define the elementary */ +/* > reflectors are stored (see also Further Details): */ +/* > = 'C': columnwise */ +/* > = 'R': rowwise */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the block reflector H. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The order of the triangular factor T (= the number of */ +/* > elementary reflectors). K >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension */ +/* > (LDV,K) if STOREV = 'C' */ +/* > (LDV,N) if STOREV = 'R' */ +/* > The matrix V. See further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,K) */ +/* > The k by k triangular factor T of the block reflector. */ +/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ +/* > lower triangular. The rest of the array is not used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= K. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larft */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The shape of the matrix V and the storage of the vectors which define */ +/* > the H(i) is best illustrated by the following example with n = 5 and */ +/* > k = 3. The elements equal to 1 are not stored. */ +/* > */ +/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ +/* > */ +/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* > ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* > ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ +/* > */ +/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* > ( 1 v3 ) */ +/* > ( 1 ) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlarft_lvl2__(char *direct, char *storev, integer *n, + integer *k, doublereal *v, integer *ldv, doublereal *tau, doublereal * + t, integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, prevlastv; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + integer lastv; + extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *), mecago_(); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = f2cmax(i__,prevlastv); + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = 0.; + } + } else { + +/* general case */ + + if (lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + myexit_(); + } + } + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1]; + } + j = f2cmin(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */ + + i__2 = j - i__; + i__3 = i__ - 1; + d__1 = -tau[i__]; + dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + 1 + + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, & + c_b6, &t[i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + myexit_(); + } + } + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1]; + } + j = f2cmin(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */ + + i__2 = i__ - 1; + i__3 = j - i__; + d__1 = -tau[i__]; + dgemv_("No transpose", &i__2, &i__3, &d__1, &v[(i__ + 1) * + v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], + ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1); + } + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + t[i__ + i__ * t_dim1] = tau[i__]; + if (i__ > 1) { + prevlastv = f2cmax(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.; + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + myexit_(); + } + } + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__ + + j * v_dim1]; + } + j = f2cmax(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */ + + i__1 = *n - *k + i__ - j; + i__2 = *k - i__; + d__1 = -tau[i__]; + dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ + + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & + c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], & + c__1); + } else { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + myexit_(); + } + } + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k + + i__) * v_dim1]; + } + j = f2cmax(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j; + d__1 = -tau[i__]; + dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1); + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = f2cmin(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + t[i__ + i__ * t_dim1] = tau[i__]; + } + } + } + return 0; + +/* End of DLARFT_LVL2 */ + +} /* dlarft_lvl2__ */ + diff --git a/lapack-netlib/SRC/dlarft_lvl2.f b/lapack-netlib/SRC/dlarft_lvl2.f new file mode 100644 index 0000000000..9614df466d --- /dev/null +++ b/lapack-netlib/SRC/dlarft_lvl2.f @@ -0,0 +1,326 @@ +*> \brief \b DLARFT_LVL2: Level 2 BLAS version for terminating case of DLARFT. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download DLARFT_LVL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, +* T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFT_LVL2 forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, + $ T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE, + $ T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL DGEMV( 'Transpose', N-K+I-J, K-I, + $ -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL DGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of DLARFT_LVL2 +* + END diff --git a/lapack-netlib/SRC/ilaenv.f b/lapack-netlib/SRC/ilaenv.f index e74a2b35ec..e108108d7f 100644 --- a/lapack-netlib/SRC/ilaenv.f +++ b/lapack-netlib/SRC/ilaenv.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ILAENV + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -159,6 +157,7 @@ *> * ===================================================================== INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -667,6 +666,10 @@ INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) IF( C3.EQ.'HD3' ) THEN NX = 128 END IF + ELSE IF( C2.EQ.'LA' ) THEN + IF( C3.EQ.'RFT' ) THEN + NX = 64 + END IF END IF ILAENV = NX RETURN diff --git a/lapack-netlib/SRC/slarft.c b/lapack-netlib/SRC/slarft.c index 6f8350b218..34445172b1 100644 --- a/lapack-netlib/SRC/slarft.c +++ b/lapack-netlib/SRC/slarft.c @@ -190,8 +190,8 @@ typedef struct Namelist Namelist; #define c_abs(z) (cabsf(Cf(z))) #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } #ifdef _MSC_VER -#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} -#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);} +#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);} +#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);} #else #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -378,19 +377,24 @@ static integer smaxloc_(float *w, integer s, integer e, integer *n) if (w[i-1]>m) mi=i ,m=w[i-1]; return mi-s+1; } + static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { integer n = *n_, incx = *incx_, incy = *incy_, i; #ifdef _MSC_VER _Fcomplex zdotc = {0.0, 0.0}; if (incx == 1 && incy == 1) { for (i=0;i \brief \b SLARFT forms the triangular factor T of a block reflector H = I - vtvH */ @@ -522,7 +539,6 @@ static real c_b6 = 1.f; /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ -/* > \htmlonly */ /* > Download SLARFT + dependencies */ /* > */ @@ -533,12 +549,11 @@ f"> */ /* > */ /* > [TXT] */ -/* > \endhtmlonly */ /* Definition: */ /* =========== */ -/* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ +/* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ /* CHARACTER DIRECT, STOREV */ /* INTEGER K, LDT, LDV, N */ @@ -643,12 +658,10 @@ f"> */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ -/* > \author Univ. of Colorado Denver */ +/* > \author Johnathan Rhyne, Univ. of Colorado Denver (original author, 2024) */ /* > \author NAG Ltd. */ -/* > \date December 2016 */ - -/* > \ingroup realOTHERauxiliary */ +/* > \ingroup larft */ /* > \par Further Details: */ /* ===================== */ @@ -677,211 +690,533 @@ f"> */ /* > \endverbatim */ /* > */ /* ===================================================================== */ -/* Subroutine */ void slarft_(char *direct, char *storev, integer *n, integer * +/* Subroutine */ int slarft_(char *direct, char *storev, integer *n, integer * k, real *v, integer *ldv, real *tau, real *t, integer *ldt) { /* System generated locals */ - integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; - real r__1; + address a__1[2]; + integer t_dim1, t_offset, v_dim1, v_offset, i__1[2], i__2, i__3; + char ch__1[2]; /* Local variables */ - integer i__, j; + integer i__, j, l; + logical lq, ql, qr; + integer nx; + extern /* Subroutine */ int slarft_lvl2__(char *, char *, integer *, + integer *, real *, integer *, real *, real *, integer *); + logical dirf, colv; extern logical lsame_(char *, char *); - extern /* Subroutine */ void sgemv_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *); - integer lastv; - extern /* Subroutine */ void strmv_(char *, char *, char *, integer *, - real *, integer *, real *, integer *); - integer prevlastv; - extern /* Subroutine */ void mecago_(); - - -/* -- LAPACK auxiliary routine (version 3.7.0) -- */ + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), strmm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *); + + +/* -- LAPACK auxiliary routine -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ -/* December 2016 */ -/* ===================================================================== */ + + + + + + + + +/* The general scheme used is inspired by the approach inside DGEQRT3 */ +/* which was (at the time of writing this code): */ +/* Based on the algorithm of Elmroth and Gustavson, */ +/* IBM J. Res. Develop. Vol 44 No. 4 July 2000. */ + /* Quick return if possible */ /* Parameter adjustments */ v_dim1 = *ldv; - v_offset = 1 + v_dim1 * 1; + v_offset = 1 + v_dim1; v -= v_offset; --tau; t_dim1 = *ldt; - t_offset = 1 + t_dim1 * 1; + t_offset = 1 + t_dim1; t -= t_offset; /* Function Body */ - if (*n == 0) { - return; + if (*n == 0 || *k == 0) { + return 0; } - if (lsame_(direct, "F")) { - prevlastv = *n; - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - prevlastv = f2cmax(i__,prevlastv); - if (tau[i__] == 0.f) { +/* Base case */ -/* H(i) = I */ + if (*n == 1 || *k == 1) { + t[t_dim1 + 1] = tau[1]; + return 0; + } - i__2 = i__; - for (j = 1; j <= i__2; ++j) { - t[j + i__ * t_dim1] = 0.f; - } - } else { - -/* general case */ - - if (lsame_(storev, "C")) { -/* Skip any trailing zeros. */ - i__2 = i__ + 1; - for (lastv = *n; lastv >= i__2; --lastv) { - if (v[lastv + i__ * v_dim1] != 0.f) { - myexit_(); - } - } - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1]; - } - j = f2cmin(lastv,prevlastv); - -/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */ - - i__2 = j - i__; - i__3 = i__ - 1; - r__1 = -tau[i__]; - sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + 1 + - v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, & - c_b6, &t[i__ * t_dim1 + 1], &c__1); - } else { -/* Skip any trailing zeros. */ - i__2 = i__ + 1; - for (lastv = *n; lastv >= i__2; --lastv) { - if (v[i__ + lastv * v_dim1] != 0.f) { - myexit_(); - } - } - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1]; - } - j = f2cmin(lastv,prevlastv); - -/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */ - - i__2 = i__ - 1; - i__3 = j - i__; - r__1 = -tau[i__]; - sgemv_("No transpose", &i__2, &i__3, &r__1, &v[(i__ + 1) * - v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], - ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1); - } +/* Determine when to cross over into the level 2 based implementation */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = direct; + i__1[1] = 1, a__1[1] = storev; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nx = ilaenv_(&c__3, "SLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (*k < nx) { + slarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + return 0; + } -/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ +/* Beginning of executable statements */ - i__2 = i__ - 1; - strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); - t[i__ + i__ * t_dim1] = tau[i__]; - if (i__ > 1) { - prevlastv = f2cmax(prevlastv,lastv); - } else { - prevlastv = lastv; - } + l = *k / 2; + +/* Determine what kind of Q we need to compute */ +/* We assume that if the user doesn't provide 'F' for DIRECT, */ +/* then they meant to provide 'B' and if they don't provide */ +/* 'C' for STOREV, then they meant to provide 'R' */ + + dirf = lsame_(direct, "F"); + colv = lsame_(storev, "C"); + +/* QR happens when we have forward direction in column storage */ + + qr = dirf && colv; + +/* LQ happens when we have forward direction in row storage */ + + lq = dirf && ! colv; + +/* QL happens when we have backward direction in column storage */ + + ql = ! dirf && colv; + +/* The last case is RQ. Due to how we structured this, if the */ +/* above 3 are false, then RQ must be true, so we never store */ +/* this */ +/* RQ happens when we have backward direction in row storage */ +/* RQ = (.NOT.DIRF).AND.(.NOT.COLV) */ + + if (qr) { + +/* Break V apart into 6 components */ + +/* V = |---------------| */ +/* |V_{1,1} 0 | */ +/* |V_{2,1} V_{2,2}| */ +/* |V_{3,1} V_{3,2}| */ +/* |---------------| */ + +/* V_{1,1}\in\R^{l,l} unit lower triangular */ +/* V_{2,1}\in\R^{k-l,l} rectangular */ +/* V_{3,1}\in\R^{n-k,l} rectangular */ + +/* V_{2,2}\in\R^{k-l,k-l} unit lower triangular */ +/* V_{3,2}\in\R^{n-k,k-l} rectangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} T_{1,2}| */ +/* |0 T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\R^{l, l} upper triangular */ +/* T_{2,2}\in\R^{k-l, k-l} upper triangular */ +/* T_{1,2}\in\R^{l, k-l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */ +/* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */ + +/* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */ + +/* Then, we can define the matrix V as */ +/* V = |-------| */ +/* |V_1 V_2| */ +/* |-------| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V*T*V' */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{1,2} */ + +/* Compute T_{1,1} recursively */ + + slarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + i__2 = *n - l; + i__3 = *k - l; + slarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1], + ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt); + +/* Compute T_{1,2} */ +/* T_{1,2} = V_{2,1}' */ + + i__2 = l; + for (j = 1; j <= i__2; ++j) { + i__3 = *k - l; + for (i__ = 1; i__ <= i__3; ++i__) { + t[j + (l + i__) * t_dim1] = v[l + i__ + j * v_dim1]; } } - } else { - prevlastv = 1; - for (i__ = *k; i__ >= 1; --i__) { - if (tau[i__] == 0.f) { -/* H(i) = I */ +/* T_{1,2} = T_{1,2}*V_{2,2} */ - i__1 = *k; - for (j = i__; j <= i__1; ++j) { - t[j + i__ * t_dim1] = 0.f; - } - } else { - -/* general case */ - - if (i__ < *k) { - if (lsame_(storev, "C")) { -/* Skip any leading zeros. */ - i__1 = i__ - 1; - for (lastv = 1; lastv <= i__1; ++lastv) { - if (v[lastv + i__ * v_dim1] != 0.f) { - myexit_(); - } - } - i__1 = *k; - for (j = i__ + 1; j <= i__1; ++j) { - t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__ - + j * v_dim1]; - } - j = f2cmax(lastv,prevlastv); - -/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */ - - i__1 = *n - *k + i__ - j; - i__2 = *k - i__; - r__1 = -tau[i__]; - sgemv_("Transpose", &i__1, &i__2, &r__1, &v[j + (i__ - + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & - c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], & - c__1); - } else { -/* Skip any leading zeros. */ - i__1 = i__ - 1; - for (lastv = 1; lastv <= i__1; ++lastv) { - if (v[i__ + lastv * v_dim1] != 0.f) { - myexit_(); - } - } - i__1 = *k; - for (j = i__ + 1; j <= i__1; ++j) { - t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k - + i__) * v_dim1]; - } - j = f2cmax(lastv,prevlastv); - -/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */ - - i__1 = *k - i__; - i__2 = *n - *k + i__ - j; - r__1 = -tau[i__]; - sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ + - 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], - ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1); - } - -/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ - - i__1 = *k - i__; - strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ - + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * - t_dim1], &c__1) - ; - if (i__ > 1) { - prevlastv = f2cmin(prevlastv,lastv); - } else { - prevlastv = lastv; - } - } - t[i__ + i__ * t_dim1] = tau[i__]; + i__2 = *k - l; + strmm_("Right", "Lower", "No transpose", "Unit", &l, &i__2, &c_b13, & + v[l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], + ldt); + +/* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__2 = *k - l; + i__3 = *n - *k; + sgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[*k + + 1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b13, + &t[(l + 1) * t_dim1 + 1], ldt); + +/* At this point, we have that T_{1,2} = V_1'*V_2 */ +/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */ +/* respectively. */ + +/* T_{1,2} = -T_{1,1}*T_{1,2} */ + + i__2 = *k - l; + strmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22, + &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*T_{2,2} */ + + i__2 = *k - l; + strmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, & + c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + + 1], ldt); + } else if (lq) { + +/* Break V apart into 6 components */ + +/* V = |----------------------| */ +/* |V_{1,1} V_{1,2} V{1,3}| */ +/* |0 V_{2,2} V{2,3}| */ +/* |----------------------| */ + +/* V_{1,1}\in\R^{l,l} unit upper triangular */ +/* V_{1,2}\in\R^{l,k-l} rectangular */ +/* V_{1,3}\in\R^{l,n-k} rectangular */ + +/* V_{2,2}\in\R^{k-l,k-l} unit upper triangular */ +/* V_{2,3}\in\R^{k-l,n-k} rectangular */ + +/* Where l = floor(k/2) */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} T_{1,2}| */ +/* |0 T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\R^{l, l} upper triangular */ +/* T_{2,2}\in\R^{k-l, k-l} upper triangular */ +/* T_{1,2}\in\R^{l, k-l} rectangular */ + +/* Then, consider the product: */ + +/* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */ +/* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */ + +/* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */ + +/* Then, we can define the matrix V as */ +/* V = |---| */ +/* |V_1| */ +/* |V_2| */ +/* |---| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V'*T*V */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{1,2} */ + +/* Compute T_{1,1} recursively */ + + slarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + i__2 = *n - l; + i__3 = *k - l; + slarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1], + ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt); + +/* Compute T_{1,2} */ +/* T_{1,2} = V_{1,2} */ + + i__2 = *k - l; + slacpy_("All", &l, &i__2, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) * + t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*V_{2,2}' */ + + i__2 = *k - l; + strmm_("Right", "Upper", "Transpose", "Unit", &l, &i__2, &c_b13, &v[l + + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__2 = *k - l; + i__3 = *n - *k; + sgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[(*k + + 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, & + c_b13, &t[(l + 1) * t_dim1 + 1], ldt); + +/* At this point, we have that T_{1,2} = V_1*V_2' */ +/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */ +/* respectively. */ + +/* T_{1,2} = -T_{1,1}*T_{1,2} */ + + i__2 = *k - l; + strmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22, + &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*T_{2,2} */ + + i__2 = *k - l; + strmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, & + c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + + 1], ldt); + } else if (ql) { + +/* Break V apart into 6 components */ + +/* V = |---------------| */ +/* |V_{1,1} V_{1,2}| */ +/* |V_{2,1} V_{2,2}| */ +/* |0 V_{3,2}| */ +/* |---------------| */ + +/* V_{1,1}\in\R^{n-k,k-l} rectangular */ +/* V_{2,1}\in\R^{k-l,k-l} unit upper triangular */ + +/* V_{1,2}\in\R^{n-k,l} rectangular */ +/* V_{2,2}\in\R^{k-l,l} rectangular */ +/* V_{3,2}\in\R^{l,l} unit upper triangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} 0 | */ +/* |T_{2,1} T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */ +/* T_{2,2}\in\R^{l, l} non-unit lower triangular */ +/* T_{2,1}\in\R^{k-l, l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */ +/* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */ + +/* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */ + +/* Then, we can define the matrix V as */ +/* V = |-------| */ +/* |V_1 V_2| */ +/* |-------| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V*T*V' */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{2,1} */ + +/* Compute T_{1,1} recursively */ + + i__2 = *n - l; + i__3 = *k - l; + slarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + slarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, & + tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt); + +/* Compute T_{2,1} */ +/* T_{2,1} = V_{2,2}' */ + + i__2 = *k - l; + for (j = 1; j <= i__2; ++j) { + i__3 = l; + for (i__ = 1; i__ <= i__3; ++i__) { + t[*k - l + i__ + j * t_dim1] = v[*n - *k + j + (*k - l + i__) + * v_dim1]; } } - } - return; -/* End of SLARFT */ +/* T_{2,1} = T_{2,1}*V_{2,1} */ + + i__2 = *k - l; + strmm_("Right", "Upper", "No transpose", "Unit", &l, &i__2, &c_b13, & + v[*n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + i__2 = *k - l; + i__3 = *n - *k; + sgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[(*k + - l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b13, &t[*k + - l + 1 + t_dim1], ldt); + +/* At this point, we have that T_{2,1} = V_2'*V_1 */ +/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */ +/* respectively. */ + +/* T_{2,1} = -T_{2,2}*T_{2,1} */ + + i__2 = *k - l; + strmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__2, &c_b22, + &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*T_{1,1} */ + + i__2 = *k - l; + strmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__2, & + c_b13, &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt); + } else { + +/* Else means RQ case */ + +/* Break V apart into 6 components */ + +/* V = |-----------------------| */ +/* |V_{1,1} V_{1,2} 0 | */ +/* |V_{2,1} V_{2,2} V_{2,3}| */ +/* |-----------------------| */ + +/* V_{1,1}\in\R^{k-l,n-k} rectangular */ +/* V_{1,2}\in\R^{k-l,k-l} unit lower triangular */ + +/* V_{2,1}\in\R^{l,n-k} rectangular */ +/* V_{2,2}\in\R^{l,k-l} rectangular */ +/* V_{2,3}\in\R^{l,l} unit lower triangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} 0 | */ +/* |T_{2,1} T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */ +/* T_{2,2}\in\R^{l, l} non-unit lower triangular */ +/* T_{2,1}\in\R^{k-l, l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */ +/* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */ + +/* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */ + +/* Then, we can define the matrix V as */ +/* V = |---| */ +/* |V_1| */ +/* |V_2| */ +/* |---| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V'TV */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{2,1} */ + +/* Compute T_{1,1} recursively */ + + i__2 = *n - l; + i__3 = *k - l; + slarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + slarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k + - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt); + +/* Compute T_{2,1} */ +/* T_{2,1} = V_{2,2} */ + + i__2 = *k - l; + slacpy_("All", &l, &i__2, &v[*k - l + 1 + (*n - *k + 1) * v_dim1], + ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*V_{1,2}' */ + + i__2 = *k - l; + strmm_("Right", "Lower", "Transpose", "Unit", &l, &i__2, &c_b13, &v[(* + n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__2 = *k - l; + i__3 = *n - *k; + sgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[*k - + l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b13, &t[*k - l + + 1 + t_dim1], ldt); + +/* At this point, we have that T_{2,1} = V_2*V_1' */ +/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */ +/* respectively. */ + +/* T_{2,1} = -T_{2,2}*T_{2,1} */ + + i__2 = *k - l; + strmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b22, + &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*T_{1,1} */ + + i__2 = *k - l; + strmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b13, + &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt); + } + return 0; } /* slarft_ */ diff --git a/lapack-netlib/SRC/slarft.f b/lapack-netlib/SRC/slarft.f index ad3a4d924c..3e0eac751e 100644 --- a/lapack-netlib/SRC/slarft.f +++ b/lapack-netlib/SRC/slarft.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download SLARFT + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -161,6 +159,7 @@ * ===================================================================== RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, $ TAU, T, LDT ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -183,7 +182,7 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, * * .. Local Scalars .. * - INTEGER I,J,L + INTEGER I,J,L,NX LOGICAL QR,LQ,QL,DIRF,COLV * * .. External Subroutines .. @@ -193,7 +192,8 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, * .. External Functions.. * LOGICAL LSAME - EXTERNAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV * * The general scheme used is inspired by the approach inside DGEQRT3 * which was (at the time of writing this code): @@ -215,6 +215,14 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, RETURN END IF * +* Determine when to cross over into the level 2 based implementation +* + NX = ILAENV(3, "SLARFT", DIRECT // STOREV, N, K, -1, -1) + IF(K.LT.NX) THEN + CALL SLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT) + RETURN + END IF +* * Beginning of executable statements * L = K / 2 diff --git a/lapack-netlib/SRC/slarft_lvl2.c b/lapack-netlib/SRC/slarft_lvl2.c new file mode 100644 index 0000000000..d1b8f31b65 --- /dev/null +++ b/lapack-netlib/SRC/slarft_lvl2.c @@ -0,0 +1,896 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef blasint logical; + +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);} +#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + + +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} + +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SLARFT_LVL2: Level 2 BLAS version for terminating case of SLARFT. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download SLARFT_LVL2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */ +/* T, LDT ) */ + +/* CHARACTER DIRECT, STOREV */ +/* INTEGER K, LDT, LDV, N */ +/* REAL T( LDT, * ), TAU( * ), V( LDV, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLARFT_LVL2 forms the triangular factor T of a real block reflector H */ +/* > of order n, which is defined as a product of k elementary reflectors. */ +/* > */ +/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ +/* > */ +/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ +/* > */ +/* > If STOREV = 'C', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th column of the array V, and */ +/* > */ +/* > H = I - V * T * V**T */ +/* > */ +/* > If STOREV = 'R', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th row of the array V, and */ +/* > */ +/* > H = I - V**T * T * V */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Specifies the order in which the elementary reflectors are */ +/* > multiplied to form the block reflector: */ +/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] STOREV */ +/* > \verbatim */ +/* > STOREV is CHARACTER*1 */ +/* > Specifies how the vectors which define the elementary */ +/* > reflectors are stored (see also Further Details): */ +/* > = 'C': columnwise */ +/* > = 'R': rowwise */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the block reflector H. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The order of the triangular factor T (= the number of */ +/* > elementary reflectors). K >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is REAL array, dimension */ +/* > (LDV,K) if STOREV = 'C' */ +/* > (LDV,N) if STOREV = 'R' */ +/* > The matrix V. See further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT,K) */ +/* > The k by k triangular factor T of the block reflector. */ +/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ +/* > lower triangular. The rest of the array is not used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= K. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larft */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The shape of the matrix V and the storage of the vectors which define */ +/* > the H(i) is best illustrated by the following example with n = 5 and */ +/* > k = 3. The elements equal to 1 are not stored. */ +/* > */ +/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ +/* > */ +/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* > ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* > ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ +/* > */ +/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* > ( 1 v3 ) */ +/* > ( 1 ) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int slarft_lvl2__(char *direct, char *storev, integer *n, + integer *k, real *v, integer *ldv, real *tau, real *t, integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + integer i__, j, prevlastv; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + integer lastv; + extern /* Subroutine */ int strmv_(char *, char *, char *, integer *, + real *, integer *, real *, integer *), + mecago_(); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = f2cmax(i__,prevlastv); + if (tau[i__] == 0.f) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = 0.f; + } + } else { + +/* general case */ + + if (lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[lastv + i__ * v_dim1] != 0.f) { + myexit_(); + } + } + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1]; + } + j = f2cmin(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */ + + i__2 = j - i__; + i__3 = i__ - 1; + r__1 = -tau[i__]; + sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + 1 + + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, & + c_b6, &t[i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[i__ + lastv * v_dim1] != 0.f) { + myexit_(); + } + } + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1]; + } + j = f2cmin(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */ + + i__2 = i__ - 1; + i__3 = j - i__; + r__1 = -tau[i__]; + sgemv_("No transpose", &i__2, &i__3, &r__1, &v[(i__ + 1) * + v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], + ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1); + } + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + t[i__ + i__ * t_dim1] = tau[i__]; + if (i__ > 1) { + prevlastv = f2cmax(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.f) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.f; + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[lastv + i__ * v_dim1] != 0.f) { + myexit_(); + } + } + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__ + + j * v_dim1]; + } + j = f2cmax(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */ + + i__1 = *n - *k + i__ - j; + i__2 = *k - i__; + r__1 = -tau[i__]; + sgemv_("Transpose", &i__1, &i__2, &r__1, &v[j + (i__ + + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & + c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], & + c__1); + } else { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[i__ + lastv * v_dim1] != 0.f) { + myexit_(); + } + } + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k + + i__) * v_dim1]; + } + j = f2cmax(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j; + r__1 = -tau[i__]; + sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1); + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = f2cmin(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + t[i__ + i__ * t_dim1] = tau[i__]; + } + } + } + return 0; + +/* End of SLARFT_LVL2 */ + +} /* slarft_lvl2__ */ + diff --git a/lapack-netlib/SRC/slarft_lvl2.f b/lapack-netlib/SRC/slarft_lvl2.f new file mode 100644 index 0000000000..7107a91d5d --- /dev/null +++ b/lapack-netlib/SRC/slarft_lvl2.f @@ -0,0 +1,326 @@ +*> \brief \b SLARFT_LVL2: Level 2 BLAS version for terminating case of SLARFT. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download SLARFT_LVL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, +* T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFT_LVL2 forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, + $ T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL SGEMV( 'Transpose', N-K+I-J, K-I, + $ -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL SGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of SLARFT_LVL2 +* + END diff --git a/lapack-netlib/SRC/zlarft.c b/lapack-netlib/SRC/zlarft.c index f8ca30b318..404f96358c 100644 --- a/lapack-netlib/SRC/zlarft.c +++ b/lapack-netlib/SRC/zlarft.c @@ -247,7 +247,6 @@ typedef struct Namelist Namelist; #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } #define sig_die(s, kill) { exit(1); } #define s_stop(s, n) {exit(0);} -static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; #define z_abs(z) (cabs(Cd(z))) #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} @@ -420,16 +419,16 @@ static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integ _Dcomplex zdotc = {0.0, 0.0}; if (incx == 1 && incy == 1) { for (i=0;i \brief \b ZLARFT forms the triangular factor T of a block reflector H = I - vtvH */ @@ -539,7 +539,6 @@ static integer c__1 = 1; /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ -/* > \htmlonly */ /* > Download ZLARFT + dependencies */ /* > */ @@ -550,12 +549,11 @@ f"> */ /* > */ /* > [TXT] */ -/* > \endhtmlonly */ /* Definition: */ /* =========== */ -/* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ +/* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ /* CHARACTER DIRECT, STOREV */ /* INTEGER K, LDT, LDV, N */ @@ -663,9 +661,7 @@ f"> */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ -/* > \date June 2016 */ - -/* > \ingroup complex16OTHERauxiliary */ +/* > \ingroup larft */ /* > \par Further Details: */ /* ===================== */ @@ -694,257 +690,542 @@ f"> */ /* > \endverbatim */ /* > */ /* ===================================================================== */ -/* Subroutine */ void zlarft_(char *direct, char *storev, integer *n, integer * +/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer * k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * t, integer *ldt) { /* System generated locals */ - integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1, z__2, z__3; + address a__1[2]; + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2[2], i__3, i__4; + doublecomplex z__1; + char ch__1[2]; /* Local variables */ - integer i__, j; + integer i__, j, l; + logical lq, ql, qr; + integer nx; + extern /* Subroutine */ int zlarft_lvl2__(char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *); + logical dirf, colv; extern logical lsame_(char *, char *); - extern /* Subroutine */ void zgemm_(char *, char *, integer *, integer *, + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *); - integer lastv; - extern /* Subroutine */ void ztrmv_(char *, char *, char *, integer *, + integer *), ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer * + , doublecomplex *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); - integer prevlastv; - extern /* Subroutine */ void mecago_(); -/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK auxiliary routine -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ -/* June 2016 */ -/* ===================================================================== */ + + + + + + + + + + +/* The general scheme used is inspired by the approach inside DGEQRT3 */ +/* which was (at the time of writing this code): */ +/* Based on the algorithm of Elmroth and Gustavson, */ +/* IBM J. Res. Develop. Vol 44 No. 4 July 2000. */ + /* Quick return if possible */ /* Parameter adjustments */ v_dim1 = *ldv; - v_offset = 1 + v_dim1 * 1; + v_offset = 1 + v_dim1; v -= v_offset; --tau; t_dim1 = *ldt; - t_offset = 1 + t_dim1 * 1; + t_offset = 1 + t_dim1; t -= t_offset; /* Function Body */ - if (*n == 0) { - return; + if (*n == 0 || *k == 0) { + return 0; + } + +/* Base case */ + + if (*n == 1 || *k == 1) { + i__1 = t_dim1 + 1; + t[i__1].r = tau[1].r, t[i__1].i = tau[1].i; + return 0; } - if (lsame_(direct, "F")) { - prevlastv = *n; - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - prevlastv = f2cmax(prevlastv,i__); - i__2 = i__; - if (tau[i__2].r == 0. && tau[i__2].i == 0.) { +/* Determine when to cross over into the level 2 based implementation */ + +/* Writing concatenation */ + i__2[0] = 1, a__1[0] = direct; + i__2[1] = 1, a__1[1] = storev; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nx = ilaenv_(&c__3, "ZLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (*k < nx) { + zlarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + return 0; + } -/* H(i) = I */ +/* Beginning of executable statements */ - i__2 = i__; - for (j = 1; j <= i__2; ++j) { - i__3 = j + i__ * t_dim1; - t[i__3].r = 0., t[i__3].i = 0.; - } - } else { - -/* general case */ - - if (lsame_(storev, "C")) { -/* Skip any trailing zeros. */ - i__2 = i__ + 1; - for (lastv = *n; lastv >= i__2; --lastv) { - i__3 = lastv + i__ * v_dim1; - if (v[i__3].r != 0. || v[i__3].i != 0.) { - myexit_(); - } - } - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = j + i__ * t_dim1; - i__4 = i__; - z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i; - d_cnjg(&z__3, &v[i__ + j * v_dim1]); - z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = - z__2.r * z__3.i + z__2.i * z__3.r; - t[i__3].r = z__1.r, t[i__3].i = z__1.i; - } - j = f2cmin(lastv,prevlastv); - -/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */ - - i__2 = j - i__; - i__3 = i__ - 1; - i__4 = i__; - z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; - zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__ - + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], & - c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1); - } else { -/* Skip any trailing zeros. */ - i__2 = i__ + 1; - for (lastv = *n; lastv >= i__2; --lastv) { - i__3 = i__ + lastv * v_dim1; - if (v[i__3].r != 0. || v[i__3].i != 0.) { - myexit_(); - } - } - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = j + i__ * t_dim1; - i__4 = i__; - z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i; - i__5 = j + i__ * v_dim1; - z__1.r = z__2.r * v[i__5].r - z__2.i * v[i__5].i, - z__1.i = z__2.r * v[i__5].i + z__2.i * v[i__5] - .r; - t[i__3].r = z__1.r, t[i__3].i = z__1.i; - } - j = f2cmin(lastv,prevlastv); - -/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */ - - i__2 = i__ - 1; - i__3 = j - i__; - i__4 = i__; - z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; - zgemm_("N", "C", &i__2, &c__1, &i__3, &z__1, &v[(i__ + 1) - * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], - ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt); - } + l = *k / 2; -/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ - - i__2 = i__ - 1; - ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); - i__2 = i__ + i__ * t_dim1; - i__3 = i__; - t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; - if (i__ > 1) { - prevlastv = f2cmax(prevlastv,lastv); - } else { - prevlastv = lastv; - } +/* Determine what kind of Q we need to compute */ +/* We assume that if the user doesn't provide 'F' for DIRECT, */ +/* then they meant to provide 'B' and if they don't provide */ +/* 'C' for STOREV, then they meant to provide 'R' */ + + dirf = lsame_(direct, "F"); + colv = lsame_(storev, "C"); + +/* QR happens when we have forward direction in column storage */ + + qr = dirf && colv; + +/* LQ happens when we have forward direction in row storage */ + + lq = dirf && ! colv; + +/* QL happens when we have backward direction in column storage */ + + ql = ! dirf && colv; + +/* The last case is RQ. Due to how we structured this, if the */ +/* above 3 are false, then RQ must be true, so we never store */ +/* this */ +/* RQ happens when we have backward direction in row storage */ +/* RQ = (.NOT.DIRF).AND.(.NOT.COLV) */ + + if (qr) { + +/* Break V apart into 6 components */ + +/* V = |---------------| */ +/* |V_{1,1} 0 | */ +/* |V_{2,1} V_{2,2}| */ +/* |V_{3,1} V_{3,2}| */ +/* |---------------| */ + +/* V_{1,1}\in\C^{l,l} unit lower triangular */ +/* V_{2,1}\in\C^{k-l,l} rectangular */ +/* V_{3,1}\in\C^{n-k,l} rectangular */ + +/* V_{2,2}\in\C^{k-l,k-l} unit lower triangular */ +/* V_{3,2}\in\C^{n-k,k-l} rectangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} T_{1,2}| */ +/* |0 T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\C^{l, l} upper triangular */ +/* T_{2,2}\in\C^{k-l, k-l} upper triangular */ +/* T_{1,2}\in\C^{l, k-l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */ +/* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */ + +/* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */ + +/* Then, we can define the matrix V as */ +/* V = |-------| */ +/* |V_1 V_2| */ +/* |-------| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V*T*V' */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{1,2} */ + +/* Compute T_{1,1} recursively */ + + zlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + i__1 = *n - l; + i__3 = *k - l; + zlarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1], + ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt); + +/* Compute T_{1,2} */ +/* T_{1,2} = V_{2,1}' */ + + i__1 = l; + for (j = 1; j <= i__1; ++j) { + i__3 = *k - l; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = j + (l + i__) * t_dim1; + d_cnjg(&z__1, &v[l + i__ + j * v_dim1]); + t[i__4].r = z__1.r, t[i__4].i = z__1.i; } } - } else { - prevlastv = 1; - for (i__ = *k; i__ >= 1; --i__) { - i__1 = i__; - if (tau[i__1].r == 0. && tau[i__1].i == 0.) { -/* H(i) = I */ +/* T_{1,2} = T_{1,2}*V_{2,2} */ - i__1 = *k; - for (j = i__; j <= i__1; ++j) { - i__2 = j + i__ * t_dim1; - t[i__2].r = 0., t[i__2].i = 0.; - } - } else { - -/* general case */ - - if (i__ < *k) { - if (lsame_(storev, "C")) { -/* Skip any leading zeros. */ - i__1 = i__ - 1; - for (lastv = 1; lastv <= i__1; ++lastv) { - i__2 = lastv + i__ * v_dim1; - if (v[i__2].r != 0. || v[i__2].i != 0.) { - myexit_(); - } - } - i__1 = *k; - for (j = i__ + 1; j <= i__1; ++j) { - i__2 = j + i__ * t_dim1; - i__3 = i__; - z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i; - d_cnjg(&z__3, &v[*n - *k + i__ + j * v_dim1]); - z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, - z__1.i = z__2.r * z__3.i + z__2.i * - z__3.r; - t[i__2].r = z__1.r, t[i__2].i = z__1.i; - } - j = f2cmax(lastv,prevlastv); - -/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */ - - i__1 = *n - *k + i__ - j; - i__2 = *k - i__; - i__3 = i__; - z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; - zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[ - j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * - v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ * - t_dim1], &c__1); - } else { -/* Skip any leading zeros. */ - i__1 = i__ - 1; - for (lastv = 1; lastv <= i__1; ++lastv) { - i__2 = i__ + lastv * v_dim1; - if (v[i__2].r != 0. || v[i__2].i != 0.) { - myexit_(); - } - } - i__1 = *k; - for (j = i__ + 1; j <= i__1; ++j) { - i__2 = j + i__ * t_dim1; - i__3 = i__; - z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i; - i__4 = j + (*n - *k + i__) * v_dim1; - z__1.r = z__2.r * v[i__4].r - z__2.i * v[i__4].i, - z__1.i = z__2.r * v[i__4].i + z__2.i * v[ - i__4].r; - t[i__2].r = z__1.r, t[i__2].i = z__1.i; - } - j = f2cmax(lastv,prevlastv); - -/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */ - - i__1 = *k - i__; - i__2 = *n - *k + i__ - j; - i__3 = i__; - z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; - zgemm_("N", "C", &i__1, &c__1, &i__2, &z__1, &v[i__ + - 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], - ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt); - } - -/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ - - i__1 = *k - i__; - ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ - + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * - t_dim1], &c__1) - ; - if (i__ > 1) { - prevlastv = f2cmin(prevlastv,lastv); - } else { - prevlastv = lastv; - } - } - i__1 = i__ + i__ * t_dim1; - i__2 = i__; - t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; + i__1 = *k - l; + ztrmm_("Right", "Lower", "No transpose", "Unit", &l, &i__1, &c_b1, &v[ + l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__1 = *k - l; + i__3 = *n - *k; + zgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[*k + + 1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b1, & + t[(l + 1) * t_dim1 + 1], ldt); + +/* At this point, we have that T_{1,2} = V_1'*V_2 */ +/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */ +/* respectively. */ + +/* T_{1,2} = -T_{1,1}*T_{1,2} */ + + i__1 = *k - l; + ztrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3, + &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*T_{2,2} */ + + i__1 = *k - l; + ztrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1, + &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1], + ldt); + } else if (lq) { + +/* Break V apart into 6 components */ + +/* V = |----------------------| */ +/* |V_{1,1} V_{1,2} V{1,3}| */ +/* |0 V_{2,2} V{2,3}| */ +/* |----------------------| */ + +/* V_{1,1}\in\C^{l,l} unit upper triangular */ +/* V_{1,2}\in\C^{l,k-l} rectangular */ +/* V_{1,3}\in\C^{l,n-k} rectangular */ + +/* V_{2,2}\in\C^{k-l,k-l} unit upper triangular */ +/* V_{2,3}\in\C^{k-l,n-k} rectangular */ + +/* Where l = floor(k/2) */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} T_{1,2}| */ +/* |0 T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\C^{l, l} upper triangular */ +/* T_{2,2}\in\C^{k-l, k-l} upper triangular */ +/* T_{1,2}\in\C^{l, k-l} rectangular */ + +/* Then, consider the product: */ + +/* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */ +/* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */ + +/* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */ + +/* Then, we can define the matrix V as */ +/* V = |---| */ +/* |V_1| */ +/* |V_2| */ +/* |---| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V'*T*V */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{1,2} */ + +/* Compute T_{1,1} recursively */ + + zlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + i__1 = *n - l; + i__3 = *k - l; + zlarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1], + ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt); + +/* Compute T_{1,2} */ +/* T_{1,2} = V_{1,2} */ + + i__1 = *k - l; + zlacpy_("All", &l, &i__1, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) * + t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*V_{2,2}' */ + + i__1 = *k - l; + ztrmm_("Right", "Upper", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[l + + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__1 = *k - l; + i__3 = *n - *k; + zgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[(*k + + 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, & + c_b1, &t[(l + 1) * t_dim1 + 1], ldt); + +/* At this point, we have that T_{1,2} = V_1*V_2' */ +/* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */ +/* respectively. */ + +/* T_{1,2} = -T_{1,1}*T_{1,2} */ + + i__1 = *k - l; + ztrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3, + &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt); + +/* T_{1,2} = T_{1,2}*T_{2,2} */ + + i__1 = *k - l; + ztrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1, + &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1], + ldt); + } else if (ql) { + +/* Break V apart into 6 components */ + +/* V = |---------------| */ +/* |V_{1,1} V_{1,2}| */ +/* |V_{2,1} V_{2,2}| */ +/* |0 V_{3,2}| */ +/* |---------------| */ + +/* V_{1,1}\in\C^{n-k,k-l} rectangular */ +/* V_{2,1}\in\C^{k-l,k-l} unit upper triangular */ + +/* V_{1,2}\in\C^{n-k,l} rectangular */ +/* V_{2,2}\in\C^{k-l,l} rectangular */ +/* V_{3,2}\in\C^{l,l} unit upper triangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} 0 | */ +/* |T_{2,1} T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */ +/* T_{2,2}\in\C^{l, l} non-unit lower triangular */ +/* T_{2,1}\in\C^{k-l, l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */ +/* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */ + +/* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */ + +/* Then, we can define the matrix V as */ +/* V = |-------| */ +/* |V_1 V_2| */ +/* |-------| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V*T*V' */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{2,1} */ + +/* Compute T_{1,1} recursively */ + + i__1 = *n - l; + i__3 = *k - l; + zlarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + zlarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, & + tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt); + +/* Compute T_{2,1} */ +/* T_{2,1} = V_{2,2}' */ + + i__1 = *k - l; + for (j = 1; j <= i__1; ++j) { + i__3 = l; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = *k - l + i__ + j * t_dim1; + d_cnjg(&z__1, &v[*n - *k + j + (*k - l + i__) * v_dim1]); + t[i__4].r = z__1.r, t[i__4].i = z__1.i; } } - } - return; -/* End of ZLARFT */ +/* T_{2,1} = T_{2,1}*V_{2,1} */ + + i__1 = *k - l; + ztrmm_("Right", "Upper", "No transpose", "Unit", &l, &i__1, &c_b1, &v[ + *n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt); +/* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__1 = *k - l; + i__3 = *n - *k; + zgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[(*k - + l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - + l + 1 + t_dim1], ldt); + +/* At this point, we have that T_{2,1} = V_2'*V_1 */ +/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */ +/* respectively. */ + +/* T_{2,1} = -T_{2,2}*T_{2,1} */ + + i__1 = *k - l; + ztrmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b3, + &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*T_{1,1} */ + + i__1 = *k - l; + ztrmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b1, + &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt); + } else { + +/* Else means RQ case */ + +/* Break V apart into 6 components */ + +/* V = |-----------------------| */ +/* |V_{1,1} V_{1,2} 0 | */ +/* |V_{2,1} V_{2,2} V_{2,3}| */ +/* |-----------------------| */ + +/* V_{1,1}\in\C^{k-l,n-k} rectangular */ +/* V_{1,2}\in\C^{k-l,k-l} unit lower triangular */ + +/* V_{2,1}\in\C^{l,n-k} rectangular */ +/* V_{2,2}\in\C^{l,k-l} rectangular */ +/* V_{2,3}\in\C^{l,l} unit lower triangular */ + +/* We will construct the T matrix */ +/* T = |---------------| */ +/* |T_{1,1} 0 | */ +/* |T_{2,1} T_{2,2}| */ +/* |---------------| */ + +/* T is the triangular factor obtained from block reflectors. */ +/* To motivate the structure, assume we have already computed T_{1,1} */ +/* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */ + +/* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */ +/* T_{2,2}\in\C^{l, l} non-unit lower triangular */ +/* T_{2,1}\in\C^{k-l, l} rectangular */ + +/* Where l = floor(k/2) */ + +/* Then, consider the product: */ + +/* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */ +/* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */ + +/* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */ + +/* Then, we can define the matrix V as */ +/* V = |---| */ +/* |V_1| */ +/* |V_2| */ +/* |---| */ + +/* So, our product is equivalent to the matrix product */ +/* I - V'*T*V */ +/* This means, we can compute T_{1,1} and T_{2,2}, then use this information */ +/* to compute T_{2,1} */ + +/* Compute T_{1,1} recursively */ + + i__1 = *n - l; + i__3 = *k - l; + zlarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[ + t_offset], ldt); + +/* Compute T_{2,2} recursively */ + + zlarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k + - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt); + +/* Compute T_{2,1} */ +/* T_{2,1} = V_{2,2} */ + + i__1 = *k - l; + zlacpy_("All", &l, &i__1, &v[*k - l + 1 + (*n - *k + 1) * v_dim1], + ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*V_{1,2}' */ + + i__1 = *k - l; + ztrmm_("Right", "Lower", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[(* + n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt); + +/* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */ +/* Note: We assume K <= N, and GEMM will do nothing if N=K */ + + i__1 = *k - l; + i__3 = *n - *k; + zgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[*k - + l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - l + 1 + + t_dim1], ldt); + +/* At this point, we have that T_{2,1} = V_2*V_1' */ +/* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */ +/* respectively. */ + +/* T_{2,1} = -T_{2,2}*T_{2,1} */ + + i__1 = *k - l; + ztrmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b3, & + t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + + t_dim1], ldt); + +/* T_{2,1} = T_{2,1}*T_{1,1} */ + + i__1 = *k - l; + ztrmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b1, + &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt); + } + return 0; } /* zlarft_ */ diff --git a/lapack-netlib/SRC/zlarft.f b/lapack-netlib/SRC/zlarft.f index 900795afad..efd52037d3 100644 --- a/lapack-netlib/SRC/zlarft.f +++ b/lapack-netlib/SRC/zlarft.f @@ -5,7 +5,6 @@ * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * -*> \htmlonly *> Download ZLARFT + dependencies *> *> [TGZ] @@ -13,7 +12,6 @@ *> [ZIP] *> *> [TXT] -*> \endhtmlonly * * Definition: * =========== @@ -161,6 +159,7 @@ * ===================================================================== RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, $ TAU, T, LDT ) + IMPLICIT NONE * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -179,11 +178,12 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, * .. Parameters .. * COMPLEX*16 ONE, NEG_ONE, ZERO - PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0) + PARAMETER(ONE=(1.0D+0,0.0D+0), ZERO = (0.0D+0,0.0D+0), + $ NEG_ONE=(-1.0D+0,0.0D+0)) * * .. Local Scalars .. * - INTEGER I,J,L + INTEGER I,J,L,NX LOGICAL QR,LQ,QL,DIRF,COLV * * .. External Subroutines .. @@ -193,7 +193,8 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, * .. External Functions.. * LOGICAL LSAME - EXTERNAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV * * .. Intrinsic Functions.. * @@ -219,6 +220,14 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, RETURN END IF * +* Determine when to cross over into the level 2 based implementation +* + NX = ILAENV(3, "ZLARFT", DIRECT // STOREV, N, K, -1, -1) + IF(K.LT.NX) THEN + CALL ZLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT) + RETURN + END IF +* * Beginning of executable statements * L = K / 2 diff --git a/lapack-netlib/SRC/zlarft_lvl2.c b/lapack-netlib/SRC/zlarft_lvl2.c new file mode 100644 index 0000000000..193870627b --- /dev/null +++ b/lapack-netlib/SRC/zlarft_lvl2.c @@ -0,0 +1,941 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef blasint logical; + +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);} +#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + + +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} + +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZLARFT_LVL2: Level 2 BLAS version for terminating case of ZLARFT. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > Download ZLARFT_LVL2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */ +/* T, LDT ) */ + +/* CHARACTER DIRECT, STOREV */ +/* INTEGER K, LDT, LDV, N */ +/* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLARFT_LVL2 forms the triangular factor T of a complex block reflector H */ +/* > of order n, which is defined as a product of k elementary reflectors. */ +/* > */ +/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ +/* > */ +/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ +/* > */ +/* > If STOREV = 'C', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th column of the array V, and */ +/* > */ +/* > H = I - V * T * V**H */ +/* > */ +/* > If STOREV = 'R', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th row of the array V, and */ +/* > */ +/* > H = I - V**H * T * V */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Specifies the order in which the elementary reflectors are */ +/* > multiplied to form the block reflector: */ +/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] STOREV */ +/* > \verbatim */ +/* > STOREV is CHARACTER*1 */ +/* > Specifies how the vectors which define the elementary */ +/* > reflectors are stored (see also Further Details): */ +/* > = 'C': columnwise */ +/* > = 'R': rowwise */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the block reflector H. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The order of the triangular factor T (= the number of */ +/* > elementary reflectors). K >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension */ +/* > (LDV,K) if STOREV = 'C' */ +/* > (LDV,N) if STOREV = 'R' */ +/* > The matrix V. See further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is COMPLEX*16 array, dimension (LDT,K) */ +/* > The k by k triangular factor T of the block reflector. */ +/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ +/* > lower triangular. The rest of the array is not used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= K. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup larft */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The shape of the matrix V and the storage of the vectors which define */ +/* > the H(i) is best illustrated by the following example with n = 5 and */ +/* > k = 3. The elements equal to 1 are not stored. */ +/* > */ +/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ +/* > */ +/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* > ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* > ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ +/* > */ +/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* > ( 1 v3 ) */ +/* > ( 1 ) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zlarft_lvl2__(char *direct, char *storev, integer *n, + integer *k, doublecomplex *v, integer *ldv, doublecomplex *tau, + doublecomplex *t, integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + integer i__, j, prevlastv; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + integer lastv; + extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), mecago_(); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = f2cmax(prevlastv,i__); + i__2 = i__; + if (tau[i__2].r == 0. && tau[i__2].i == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + t[i__3].r = 0., t[i__3].i = 0.; + } + } else { + +/* general case */ + + if (lsame_(storev, "C")) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = lastv + i__ * v_dim1; + if (v[i__3].r != 0. || v[i__3].i != 0.) { + myexit_(); + } + } + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + i__4 = i__; + z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i; + d_cnjg(&z__3, &v[i__ + j * v_dim1]); + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = + z__2.r * z__3.i + z__2.i * z__3.r; + t[i__3].r = z__1.r, t[i__3].i = z__1.i; + } + j = f2cmin(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */ + + i__2 = j - i__; + i__3 = i__ - 1; + i__4 = i__; + z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; + zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__ + + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], & + c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = i__ + lastv * v_dim1; + if (v[i__3].r != 0. || v[i__3].i != 0.) { + myexit_(); + } + } + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + i__4 = i__; + z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i; + i__5 = j + i__ * v_dim1; + z__1.r = z__2.r * v[i__5].r - z__2.i * v[i__5].i, + z__1.i = z__2.r * v[i__5].i + z__2.i * v[i__5] + .r; + t[i__3].r = z__1.r, t[i__3].i = z__1.i; + } + j = f2cmin(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */ + + i__2 = i__ - 1; + i__3 = j - i__; + i__4 = i__; + z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; + zgemm_("N", "C", &i__2, &c__1, &i__3, &z__1, &v[(i__ + 1) + * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], + ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt); + } + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + if (i__ > 1) { + prevlastv = f2cmax(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + i__1 = i__; + if (tau[i__1].r == 0. && tau[i__1].i == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + t[i__2].r = 0., t[i__2].i = 0.; + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = lastv + i__ * v_dim1; + if (v[i__2].r != 0. || v[i__2].i != 0.) { + myexit_(); + } + } + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + i__3 = i__; + z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i; + d_cnjg(&z__3, &v[*n - *k + i__ + j * v_dim1]); + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, + z__1.i = z__2.r * z__3.i + z__2.i * + z__3.r; + t[i__2].r = z__1.r, t[i__2].i = z__1.i; + } + j = f2cmax(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */ + + i__1 = *n - *k + i__ - j; + i__2 = *k - i__; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[ + j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * + v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ * + t_dim1], &c__1); + } else { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = i__ + lastv * v_dim1; + if (v[i__2].r != 0. || v[i__2].i != 0.) { + myexit_(); + } + } + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + i__3 = i__; + z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i; + i__4 = j + (*n - *k + i__) * v_dim1; + z__1.r = z__2.r * v[i__4].r - z__2.i * v[i__4].i, + z__1.i = z__2.r * v[i__4].i + z__2.i * v[ + i__4].r; + t[i__2].r = z__1.r, t[i__2].i = z__1.i; + } + j = f2cmax(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zgemm_("N", "C", &i__1, &c__1, &i__2, &z__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt); + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + if (i__ > 1) { + prevlastv = f2cmin(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + i__1 = i__ + i__ * t_dim1; + i__2 = i__; + t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; + } + } + } + return 0; + +/* End of ZLARFT_LVL2 */ + +} /* zlarft_lvl2__ */ + diff --git a/lapack-netlib/SRC/zlarft_lvl2.f b/lapack-netlib/SRC/zlarft_lvl2.f new file mode 100644 index 0000000000..808c7fdb25 --- /dev/null +++ b/lapack-netlib/SRC/zlarft_lvl2.f @@ -0,0 +1,327 @@ +*> \brief \b ZLARFT_LVL2: Level 2 BLAS version for terminating case of ZLARFT. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> Download ZLARFT_LVL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, +* T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFT_LVL2 forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, + $ T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZTRMV, ZGEMM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL ZGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, + $ -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of ZLARFT_LVL2 +* + END