diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake
index 04dd49cfbe..bf346a1b5b 100644
--- a/cmake/lapack.cmake
+++ b/cmake/lapack.cmake
@@ -70,8 +70,8 @@ set(SLASRC
    slaqgb.f slaqge.f slaqp2.f slaqps.f slaqp2rk.f slaqp3rk.f slaqsb.f slaqsp.f slaqsy.f
    slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
    slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
-   slarf.f  slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
-   slarf1f.f slarf1l.f slarrv.f slartv.f
+   slarf.f  slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarft_lvl2.f slarfx.f
+   slarfy.f slargv.f slarf1f.f slarf1l.f slarrv.f slartv.f
    slarz.f  slarzb.f slarzt.f slasy2.f
    slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
    slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f
@@ -177,7 +177,7 @@ set(CLASRC
    claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
    claqz0.f claqz1.f claqz2.f claqz3.f
    claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
-   clarf.f  clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f
+   clarf.f  clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f clarft_lvl2.f
    clarf1f.f clarf1l.f
    clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f90 clartv.f
    clarz.f  clarzb.f clarzt.f clascl.f claset.f clasr.f  classq.f90
@@ -262,8 +262,8 @@ set(DLASRC
    dlaqgb.f dlaqge.f dlaqp2.f dlaqp2rk.f dlaqp3rk.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
    dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
    dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
-   dlarf.f  dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
-   dlarf1f.f dlarf1l.f dlargv.f dlarrv.f dlartv.f
+   dlarf.f  dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarft_lvl2.f
+   dlarfx.f dlarfy.f dlarf1f.f dlarf1l.f dlargv.f dlarrv.f dlartv.f
    dlarz.f  dlarzb.f dlarzt.f dlasy2.f
    dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
    dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f
@@ -372,7 +372,7 @@ set(ZLASRC
    zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
    zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
    zlarcm.f zlarf.f  zlarfb.f zlarfb_gett.f
-   zlarfg.f zlarfgp.f zlarft.f zlarf1f.f zlarf1l.f
+   zlarfg.f zlarfgp.f zlarft.f zlarft_lvl2.f zlarf1f.f zlarf1l.f
    zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f90 zlartv.f
    zlarz.f  zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
    zlassq.f90 zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f
@@ -575,8 +575,8 @@ set(SLASRC
    slaqgb.c slaqge.c slaqp2.c slaqp2rk.c slaqp3rk.c slaqps.c slaqsb.c slaqsp.c slaqsy.c
    slaqr0.c slaqr1.c slaqr2.c slaqr3.c slaqr4.c slaqr5.c
    slaqtr.c slar1v.c slar2v.c ilaslr.c ilaslc.c
-   slarf.c  slarfb.c slarfb_gett.c slarfg.c slarfgp.c slarft.c slarfx.c slarfy.c slargv.c
-   slarf1f.c slarf1l.c slarrv.c slartv.c
+   slarf.c  slarfb.c slarfb_gett.c slarfg.c slarfgp.c slarft.c slarft_lvl2.c slarfx.c
+   slarfy.c slargv.c slarf1f.c slarf1l.c slarrv.c slartv.c
    slarz.c  slarzb.c slarzt.c slasy2.c
    slasyf.c slasyf_rook.c slasyf_rk.c slasyf_aa.c
    slatbs.c slatdf.c slatps.c slatrd.c slatrs.c slatrz.c
@@ -682,7 +682,7 @@ set(CLASRC
    claqr0.c claqr1.c claqr2.c claqr3.c claqr4.c claqr5.c
    claqsp.c claqsy.c clar1v.c clar2v.c ilaclr.c ilaclc.c
    clarf.c  clarfb.c clarfb_gett.c clarfg.c clarfgp.c clarft.c
-   clarf1f.c clarf1l.c
+   clarft_lvl2.c clarf1f.c clarf1l.c
    clarfx.c clarfy.c clargv.c clarnv.c clarrv.c clartg.c clartv.c
    clarz.c  clarzb.c clarzt.c clascl.c claset.c clasr.c  classq.c
    clasyf.c clasyf_rook.c clasyf_rk.c clasyf_aa.c
@@ -765,8 +765,8 @@ set(DLASRC
    dlaqgb.c dlaqge.c dlaqp2.c dlaqp2rk.c dlaqp3rk.c dlaqps.c dlaqsb.c dlaqsp.c dlaqsy.c
    dlaqr0.c dlaqr1.c dlaqr2.c dlaqr3.c dlaqr4.c dlaqr5.c
    dlaqtr.c dlar1v.c dlar2v.c iladlr.c iladlc.c
-   dlarf.c  dlarfb.c dlarfb_gett.c dlarfg.c dlarfgp.c dlarft.c dlarfx.c dlarfy.c
-   dlarf1f.c dlarf1l.c dlargv.c dlarrv.c dlartv.c
+   dlarf.c  dlarfb.c dlarfb_gett.c dlarfg.c dlarfgp.c dlarft.c dlarft_lvl2.c 
+   dlarfx.c dlarfy.c dlarf1f.c dlarf1l.c dlargv.c dlarrv.c dlartv.c
    dlarz.c  dlarzb.c dlarzt.c dlasy2.c
    dlasyf.c dlasyf_rook.c dlasyf_rk.c dlasyf_aa.c
    dlatbs.c dlatdf.c dlatps.c dlatrd.c dlatrs.c dlatrz.c
@@ -874,7 +874,7 @@ set(ZLASRC
    zlaqr0.c zlaqr1.c zlaqr2.c zlaqr3.c zlaqr4.c zlaqr5.c
    zlaqsp.c zlaqsy.c zlar1v.c zlar2v.c ilazlr.c ilazlc.c
    zlarcm.c zlarf.c  zlarfb.c zlarfb_gett.c zlarf1f.c zlarf1l.c
-   zlarfg.c zlarfgp.c zlarft.c
+   zlarfg.c zlarfgp.c zlarft.c zlarft_lvl2.c
    zlarfx.c zlarfy.c zlargv.c zlarnv.c zlarrv.c zlartg.c zlartv.c
    zlarz.c  zlarzb.c zlarzt.c zlascl.c zlaset.c zlasr.c
    zlassq.c zlasyf.c zlasyf_rook.c zlasyf_rk.c zlasyf_aa.c
diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile
index ebf3431a92..2200a4628d 100644
--- a/lapack-netlib/SRC/Makefile
+++ b/lapack-netlib/SRC/Makefile
@@ -154,7 +154,7 @@ SLASRC_O = \
    slaqgb.o slaqge.o slaqp2.o slaqp2rk.o slaqp3rk.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \
    slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
    slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \
-   slarf.o  slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \
+   slarf.o  slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarft_lvl2.o slarfx.o slarfy.o slargv.o \
    slarf1f.o slarf1l.o slarrv.o slartv.o \
    slarz.o  slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \
    slasyf_rk.o \
@@ -270,7 +270,7 @@ CLASRC_O = \
    claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \
    claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \
    claqz0.o claqz1.o claqz2.o claqz3.o \
-   clarf.o  clarfb.o clarfb_gett.o clarfg.o clarft.o clarfgp.o \
+   clarf.o  clarfb.o clarfb_gett.o clarfg.o clarft.o clarft_lvl2.o clarfgp.o \
    clarf1f.o clarf1l.o \
    clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
    clarz.o  clarzb.o clarzt.o clascl.o claset.o clasr.o  classq.o \
@@ -364,7 +364,7 @@ DLASRC_O = \
    dlaqgb.o dlaqge.o dlaqp2.o dlaqp2rk.o dlaqp3rk.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \
    dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
    dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \
-   dlarf.o  dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \
+   dlarf.o  dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarft_lvl2.o dlarfx.o dlarfy.o \
    dlarf1f.o dlarf1l.o dlargv.o dlarrv.o dlartv.o \
    dlarz.o  dlarzb.o dlarzt.o dlaswp.o dlasy2.o \
    dlasyf.o dlasyf_rook.o dlasyf_rk.o \
@@ -479,7 +479,7 @@ ZLASRC_O = \
    zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \
    zlaqz0.o zlaqz1.o zlaqz2.o zlaqz3.o \
    zlarcm.o zlarf.o  zlarfb.o zlarfb_gett.o \
-   zlarfg.o zlarft.o zlarfgp.o zlarf1f.o zlarf1l.o \
+   zlarfg.o zlarft.o zlarft_lvl2.o zlarfgp.o zlarf1f.o zlarf1l.o \
    zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
    zlarz.o  zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
    zlassq.o zlaswp.o zlasyf.o zlasyf_rook.o zlasyf_rk.o zlasyf_aa.o \
diff --git a/lapack-netlib/SRC/clarft.c b/lapack-netlib/SRC/clarft.c
index f44cb810e7..e8e518c5ca 100644
--- a/lapack-netlib/SRC/clarft.c
+++ b/lapack-netlib/SRC/clarft.c
@@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
-static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@@ -525,12 +524,13 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 */
 
 
-
-
 /* Table of constant values */
 
 static complex c_b1 = {1.f,0.f};
-static integer c__1 = 1;
+static complex c_b3 = {-1.f,0.f};
+static integer c__3 = 3;
+static integer c_n1 = -1;
+static integer c__2 = 2;
 
 /* > \brief \b CLARFT forms the triangular factor T of a block reflector H = I - vtvH */
 
@@ -539,7 +539,6 @@ static integer c__1 = 1;
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 
-/* > \htmlonly */
 /* > Download CLARFT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarft.
 f"> */
@@ -550,12 +549,11 @@ f"> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clarft.
 f"> */
 /* > [TXT]</a> */
-/* > \endhtmlonly */
 
 /*  Definition: */
 /*  =========== */
 
-/*       SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
+/*        SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
 
 /*       CHARACTER          DIRECT, STOREV */
 /*       INTEGER            K, LDT, LDV, N */
@@ -663,9 +661,7 @@ f"> */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 
-/* > \date December 2016 */
-
-/* > \ingroup complexOTHERauxiliary */
+/* > \ingroup larft */
 
 /* > \par Further Details: */
 /*  ===================== */
@@ -694,255 +690,539 @@ f"> */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
-/* Subroutine */ void clarft_(char *direct, char *storev, integer *n, integer *
+/* Subroutine */ int clarft_(char *direct, char *storev, integer *n, integer *
 	k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt)
 {
     /* System generated locals */
-    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1={0.,0.}, q__2={0.,0.}, q__3={0.,0.};
+    address a__1[2];
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2[2], i__3, i__4;
+    complex q__1;
+    char ch__1[2];
 
     /* Local variables */
-    integer i__, j;
-    extern /* Subroutine */ void cgemm_(char *, char *, integer *, integer *, 
+    integer i__, j, l;
+    logical lq, ql, qr;
+    integer nx;
+    extern /* Subroutine */ int clarft_lvl2__(char *, char *, integer *, 
+	    integer *, complex *, integer *, complex *, complex *, integer *);
+    logical dirf, colv;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
 	    integer *, complex *, complex *, integer *, complex *, integer *, 
-	    complex *, complex *, integer *), cgemv_(char *, 
-	    integer *, integer *, complex *, complex *, integer *, complex *, 
-	    integer *, complex *, complex *, integer *);
+	    complex *, complex *, integer *);
     extern logical lsame_(char *, char *);
-    integer lastv;
-    extern /* Subroutine */ void ctrmv_(char *, char *, char *, integer *, 
-	    complex *, integer *, complex *, integer *);
-    integer prevlastv;
-    extern /* Subroutine */ void mecago_();
+    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
+	    integer *, integer *, complex *, complex *, integer *, complex *, 
+	    integer *), clacpy_(char *, 
+	    integer *, integer *, complex *, integer *, complex *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
 
 
-/*  -- LAPACK auxiliary routine (version 3.7.0) -- */
+/*  -- LAPACK auxiliary routine -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-/*     December 2016 */
 
 
-/*  ===================================================================== */
 
 
+
+
+
+
+
+
+
+
+
+
+/*     The general scheme used is inspired by the approach inside DGEQRT3 */
+/*     which was (at the time of writing this code): */
+/*     Based on the algorithm of Elmroth and Gustavson, */
+/*     IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+
 /*     Quick return if possible */
 
     /* Parameter adjustments */
     v_dim1 = *ldv;
-    v_offset = 1 + v_dim1 * 1;
+    v_offset = 1 + v_dim1;
     v -= v_offset;
     --tau;
     t_dim1 = *ldt;
-    t_offset = 1 + t_dim1 * 1;
+    t_offset = 1 + t_dim1;
     t -= t_offset;
 
     /* Function Body */
-    if (*n == 0) {
-	return;
+    if (*n == 0 || *k == 0) {
+	return 0;
     }
 
-    if (lsame_(direct, "F")) {
-	prevlastv = *n;
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    prevlastv = f2cmax(prevlastv,i__);
-	    i__2 = i__;
-	    if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) {
+/*     Base case */
 
-/*              H(i)  =  I */
+    if (*n == 1 || *k == 1) {
+	i__1 = t_dim1 + 1;
+	t[i__1].r = tau[1].r, t[i__1].i = tau[1].i;
+	return 0;
+    }
 
-		i__2 = i__;
-		for (j = 1; j <= i__2; ++j) {
-		    i__3 = j + i__ * t_dim1;
-		    t[i__3].r = 0.f, t[i__3].i = 0.f;
-		}
-	    } else {
-
-/*              general case */
-
-		if (lsame_(storev, "C")) {
-/*                 Skip any trailing zeros. */
-		    i__2 = i__ + 1;
-		    for (lastv = *n; lastv >= i__2; --lastv) {
-			i__3 = lastv + i__ * v_dim1;
-			if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
-			    myexit_();
-			}
-		    }
-		    i__2 = i__ - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			i__3 = j + i__ * t_dim1;
-			i__4 = i__;
-			q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i;
-			r_cnjg(&q__3, &v[i__ + j * v_dim1]);
-			q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = 
-				q__2.r * q__3.i + q__2.i * q__3.r;
-			t[i__3].r = q__1.r, t[i__3].i = q__1.i;
-		    }
-		    j = f2cmin(lastv,prevlastv);
-
-/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */
-
-		    i__2 = j - i__;
-		    i__3 = i__ - 1;
-		    i__4 = i__;
-		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
-		    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__ 
-			    + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &
-			    c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1);
-		} else {
-/*                 Skip any trailing zeros. */
-		    i__2 = i__ + 1;
-		    for (lastv = *n; lastv >= i__2; --lastv) {
-			i__3 = i__ + lastv * v_dim1;
-			if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
-			    myexit_();
-			}
-		    }
-		    i__2 = i__ - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			i__3 = j + i__ * t_dim1;
-			i__4 = i__;
-			q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i;
-			i__5 = j + i__ * v_dim1;
-			q__1.r = q__2.r * v[i__5].r - q__2.i * v[i__5].i, 
-				q__1.i = q__2.r * v[i__5].i + q__2.i * v[i__5]
-				.r;
-			t[i__3].r = q__1.r, t[i__3].i = q__1.i;
-		    }
-		    j = f2cmin(lastv,prevlastv);
-
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */
-
-		    i__2 = i__ - 1;
-		    i__3 = j - i__;
-		    i__4 = i__;
-		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
-		    cgemm_("N", "C", &i__2, &c__1, &i__3, &q__1, &v[(i__ + 1) 
-			    * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], 
-			    ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt);
-		}
+/*     Determine when to cross over into the level 2 based implementation */
+
+/* Writing concatenation */
+    i__2[0] = 1, a__1[0] = direct;
+    i__2[1] = 1, a__1[1] = storev;
+    s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+    nx = ilaenv_(&c__3, "CLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (*k < nx) {
+	clarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+	return 0;
+    }
 
-/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
-
-		i__2 = i__ - 1;
-		ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
-			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
-		i__2 = i__ + i__ * t_dim1;
-		i__3 = i__;
-		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
-		if (i__ > 1) {
-		    prevlastv = f2cmax(prevlastv,lastv);
-		} else {
-		    prevlastv = lastv;
-		}
+/*     Beginning of executable statements */
+
+    l = *k / 2;
+
+/*     Determine what kind of Q we need to compute */
+/*     We assume that if the user doesn't provide 'F' for DIRECT, */
+/*     then they meant to provide 'B' and if they don't provide */
+/*     'C' for STOREV, then they meant to provide 'R' */
+
+    dirf = lsame_(direct, "F");
+    colv = lsame_(storev, "C");
+
+/*     QR happens when we have forward direction in column storage */
+
+    qr = dirf && colv;
+
+/*     LQ happens when we have forward direction in row storage */
+
+    lq = dirf && ! colv;
+
+/*     QL happens when we have backward direction in column storage */
+
+    ql = ! dirf && colv;
+
+/*     The last case is RQ. Due to how we structured this, if the */
+/*     above 3 are false, then RQ must be true, so we never store */
+/*     this */
+/*     RQ happens when we have backward direction in row storage */
+/*     RQ = (.NOT.DIRF).AND.(.NOT.COLV) */
+
+    if (qr) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |---------------| */
+/*            |V_{1,1} 0      | */
+/*            |V_{2,1} V_{2,2}| */
+/*            |V_{3,1} V_{3,2}| */
+/*            |---------------| */
+
+/*        V_{1,1}\in\C^{l,l}      unit lower triangular */
+/*        V_{2,1}\in\C^{k-l,l}    rectangular */
+/*        V_{3,1}\in\C^{n-k,l}    rectangular */
+
+/*        V_{2,2}\in\C^{k-l,k-l}  unit lower triangular */
+/*        V_{3,2}\in\C^{n-k,k-l}  rectangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} T_{1,2}| */
+/*            |0       T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\C^{l, l}     upper triangular */
+/*        T_{2,2}\in\C^{k-l, k-l} upper triangular */
+/*        T_{1,2}\in\C^{l, k-l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */
+/*        = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */
+
+/*        Define T{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |-------| */
+/*            |V_1 V_2| */
+/*            |-------| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V*T*V' */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{1,2} */
+
+/*        Compute T_{1,1} recursively */
+
+	clarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	i__1 = *n - l;
+	i__3 = *k - l;
+	clarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1], 
+		ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/*        Compute T_{1,2} */
+/*        T_{1,2} = V_{2,1}' */
+
+	i__1 = l;
+	for (j = 1; j <= i__1; ++j) {
+	    i__3 = *k - l;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		i__4 = j + (l + i__) * t_dim1;
+		r_cnjg(&q__1, &v[l + i__ + j * v_dim1]);
+		t[i__4].r = q__1.r, t[i__4].i = q__1.i;
 	    }
 	}
-    } else {
-	prevlastv = 1;
-	for (i__ = *k; i__ >= 1; --i__) {
-	    i__1 = i__;
-	    if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) {
 
-/*              H(i)  =  I */
+/*        T_{1,2} = T_{1,2}*V_{2,2} */
 
-		i__1 = *k;
-		for (j = i__; j <= i__1; ++j) {
-		    i__2 = j + i__ * t_dim1;
-		    t[i__2].r = 0.f, t[i__2].i = 0.f;
-		}
-	    } else {
-
-/*              general case */
-
-		if (i__ < *k) {
-		    if (lsame_(storev, "C")) {
-/*                    Skip any leading zeros. */
-			i__1 = i__ - 1;
-			for (lastv = 1; lastv <= i__1; ++lastv) {
-			    i__2 = lastv + i__ * v_dim1;
-			    if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
-				myexit_();
-			    }
-			}
-			i__1 = *k;
-			for (j = i__ + 1; j <= i__1; ++j) {
-			    i__2 = j + i__ * t_dim1;
-			    i__3 = i__;
-			    q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i;
-			    r_cnjg(&q__3, &v[*n - *k + i__ + j * v_dim1]);
-			    q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, 
-				    q__1.i = q__2.r * q__3.i + q__2.i * 
-				    q__3.r;
-			    t[i__2].r = q__1.r, t[i__2].i = q__1.i;
-			}
-			j = f2cmax(lastv,prevlastv);
-
-/*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */
-
-			i__1 = *n - *k + i__ - j;
-			i__2 = *k - i__;
-			i__3 = i__;
-			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
-			cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[
-				j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * 
-				v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ * 
-				t_dim1], &c__1);
-		    } else {
-/*                    Skip any leading zeros. */
-			i__1 = i__ - 1;
-			for (lastv = 1; lastv <= i__1; ++lastv) {
-			    i__2 = i__ + lastv * v_dim1;
-			    if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
-				myexit_();
-			    }
-			}
-			i__1 = *k;
-			for (j = i__ + 1; j <= i__1; ++j) {
-			    i__2 = j + i__ * t_dim1;
-			    i__3 = i__;
-			    q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i;
-			    i__4 = j + (*n - *k + i__) * v_dim1;
-			    q__1.r = q__2.r * v[i__4].r - q__2.i * v[i__4].i, 
-				    q__1.i = q__2.r * v[i__4].i + q__2.i * v[
-				    i__4].r;
-			    t[i__2].r = q__1.r, t[i__2].i = q__1.i;
-			}
-			j = f2cmax(lastv,prevlastv);
-
-/*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */
-
-			i__1 = *k - i__;
-			i__2 = *n - *k + i__ - j;
-			i__3 = i__;
-			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
-			cgemm_("N", "C", &i__1, &c__1, &i__2, &q__1, &v[i__ + 
-				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], 
-				ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt);
-		    }
-
-/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
-		    i__1 = *k - i__;
-		    ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ 
-			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
-			     t_dim1], &c__1)
-			    ;
-		    if (i__ > 1) {
-			prevlastv = f2cmin(prevlastv,lastv);
-		    } else {
-			prevlastv = lastv;
-		    }
-		}
-		i__1 = i__ + i__ * t_dim1;
-		i__2 = i__;
-		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+	i__1 = *k - l;
+	ctrmm_("Right", "Lower", "No transpose", "Unit", &l, &i__1, &c_b1, &v[
+		l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__1 = *k - l;
+	i__3 = *n - *k;
+	cgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[*k + 
+		1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b1, &
+		t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        At this point, we have that T_{1,2} = V_1'*V_2 */
+/*        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/*        respectively. */
+
+/*        T_{1,2} = -T_{1,1}*T_{1,2} */
+
+	i__1 = *k - l;
+	ctrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3, 
+		&t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*T_{2,2} */
+
+	i__1 = *k - l;
+	ctrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+		 &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1], 
+		ldt);
+    } else if (lq) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |----------------------| */
+/*            |V_{1,1} V_{1,2} V{1,3}| */
+/*            |0       V_{2,2} V{2,3}| */
+/*            |----------------------| */
+
+/*        V_{1,1}\in\C^{l,l}      unit upper triangular */
+/*        V_{1,2}\in\C^{l,k-l}    rectangular */
+/*        V_{1,3}\in\C^{l,n-k}    rectangular */
+
+/*        V_{2,2}\in\C^{k-l,k-l}  unit upper triangular */
+/*        V_{2,3}\in\C^{k-l,n-k}  rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} T_{1,2}| */
+/*            |0       T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\C^{l, l}     upper triangular */
+/*        T_{2,2}\in\C^{k-l, k-l} upper triangular */
+/*        T_{1,2}\in\C^{l, k-l}   rectangular */
+
+/*        Then, consider the product: */
+
+/*        (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */
+/*        = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */
+
+/*        Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |---| */
+/*            |V_1| */
+/*            |V_2| */
+/*            |---| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V'*T*V */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{1,2} */
+
+/*        Compute T_{1,1} recursively */
+
+	clarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	i__1 = *n - l;
+	i__3 = *k - l;
+	clarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1], 
+		ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/*        Compute T_{1,2} */
+/*        T_{1,2} = V_{1,2} */
+
+	i__1 = *k - l;
+	clacpy_("All", &l, &i__1, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) * 
+		t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*V_{2,2}' */
+
+	i__1 = *k - l;
+	ctrmm_("Right", "Upper", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[l 
+		+ 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__1 = *k - l;
+	i__3 = *n - *k;
+	cgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[(*k + 
+		1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, &
+		c_b1, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        At this point, we have that T_{1,2} = V_1*V_2' */
+/*        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/*        respectively. */
+
+/*        T_{1,2} = -T_{1,1}*T_{1,2} */
+
+	i__1 = *k - l;
+	ctrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3, 
+		&t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*T_{2,2} */
+
+	i__1 = *k - l;
+	ctrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+		 &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1], 
+		ldt);
+    } else if (ql) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |---------------| */
+/*            |V_{1,1} V_{1,2}| */
+/*            |V_{2,1} V_{2,2}| */
+/*            |0       V_{3,2}| */
+/*            |---------------| */
+
+/*        V_{1,1}\in\C^{n-k,k-l}  rectangular */
+/*        V_{2,1}\in\C^{k-l,k-l}  unit upper triangular */
+
+/*        V_{1,2}\in\C^{n-k,l}    rectangular */
+/*        V_{2,2}\in\C^{k-l,l}    rectangular */
+/*        V_{3,2}\in\C^{l,l}      unit upper triangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} 0      | */
+/*            |T_{2,1} T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */
+/*        T_{2,2}\in\C^{l, l}     non-unit lower triangular */
+/*        T_{2,1}\in\C^{k-l, l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */
+/*        = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */
+
+/*        Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |-------| */
+/*            |V_1 V_2| */
+/*            |-------| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V*T*V' */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{2,1} */
+
+/*        Compute T_{1,1} recursively */
+
+	i__1 = *n - l;
+	i__3 = *k - l;
+	clarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	clarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, &
+		tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/*        Compute T_{2,1} */
+/*        T_{2,1} = V_{2,2}' */
+
+	i__1 = *k - l;
+	for (j = 1; j <= i__1; ++j) {
+	    i__3 = l;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		i__4 = *k - l + i__ + j * t_dim1;
+		r_cnjg(&q__1, &v[*n - *k + j + (*k - l + i__) * v_dim1]);
+		t[i__4].r = q__1.r, t[i__4].i = q__1.i;
 	    }
 	}
-    }
-    return;
 
-/*     End of CLARFT */
+/*        T_{2,1} = T_{2,1}*V_{2,1} */
+
+	i__1 = *k - l;
+	ctrmm_("Right", "Upper", "No transpose", "Unit", &l, &i__1, &c_b1, &v[
+		*n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt);
 
+/*        T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__1 = *k - l;
+	i__3 = *n - *k;
+	cgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[(*k - 
+		l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - 
+		l + 1 + t_dim1], ldt);
+
+/*        At this point, we have that T_{2,1} = V_2'*V_1 */
+/*        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/*        respectively. */
+
+/*        T_{2,1} = -T_{2,2}*T_{2,1} */
+
+	i__1 = *k - l;
+	ctrmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b3, 
+		&t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + 
+		t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*T_{1,1} */
+
+	i__1 = *k - l;
+	ctrmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+		 &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+    } else {
+
+/*        Else means RQ case */
+
+/*        Break V apart into 6 components */
+
+/*        V = |-----------------------| */
+/*            |V_{1,1} V_{1,2} 0      | */
+/*            |V_{2,1} V_{2,2} V_{2,3}| */
+/*            |-----------------------| */
+
+/*        V_{1,1}\in\C^{k-l,n-k}  rectangular */
+/*        V_{1,2}\in\C^{k-l,k-l}  unit lower triangular */
+
+/*        V_{2,1}\in\C^{l,n-k}    rectangular */
+/*        V_{2,2}\in\C^{l,k-l}    rectangular */
+/*        V_{2,3}\in\C^{l,l}      unit lower triangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} 0      | */
+/*            |T_{2,1} T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */
+/*        T_{2,2}\in\C^{l, l}     non-unit lower triangular */
+/*        T_{2,1}\in\C^{k-l, l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */
+/*        = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */
+
+/*        Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |---| */
+/*            |V_1| */
+/*            |V_2| */
+/*            |---| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V'*T*V */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{2,1} */
+
+/*        Compute T_{1,1} recursively */
+
+	i__1 = *n - l;
+	i__3 = *k - l;
+	clarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	clarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k 
+		- l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/*        Compute T_{2,1} */
+/*        T_{2,1} = V_{2,2} */
+
+	i__1 = *k - l;
+	clacpy_("All", &l, &i__1, &v[*k - l + 1 + (*n - *k + 1) * v_dim1], 
+		ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*V_{1,2}' */
+
+	i__1 = *k - l;
+	ctrmm_("Right", "Lower", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[(*
+		n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__1 = *k - l;
+	i__3 = *n - *k;
+	cgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[*k - 
+		l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - l + 1 
+		+ t_dim1], ldt);
+
+/*        At this point, we have that T_{2,1} = V_2*V_1' */
+/*        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/*        respectively. */
+
+/*        T_{2,1} = -T_{2,2}*T_{2,1} */
+
+	i__1 = *k - l;
+	ctrmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b3, &
+		t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + 
+		t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*T_{1,1} */
+
+	i__1 = *k - l;
+	ctrmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b1, 
+		&t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+    }
+    return 0;
 } /* clarft_ */
 
diff --git a/lapack-netlib/SRC/clarft.f b/lapack-netlib/SRC/clarft.f
index de8b97bf9c..3bf2448ba8 100644
--- a/lapack-netlib/SRC/clarft.f
+++ b/lapack-netlib/SRC/clarft.f
@@ -5,7 +5,6 @@
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
-*> \htmlonly
 *> Download CLARFT + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarft.f">
 *> [TGZ]</a>
@@ -13,7 +12,6 @@
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clarft.f">
 *> [TXT]</a>
-*> \endhtmlonly
 *
 *  Definition:
 *  ===========
@@ -161,6 +159,7 @@
 *  =====================================================================
       RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
      $                             TAU, T, LDT )
+      IMPLICIT NONE
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
@@ -179,11 +178,12 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
 *     .. Parameters ..
 *
       COMPLEX           ONE, NEG_ONE, ZERO
-      PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0)
+      PARAMETER(ONE=(1.0E+0,0.0E+0), ZERO = (0.0E+0,0.0E+0),
+     $            NEG_ONE=(-1.0E+0,0.0E+0))
 *
 *     .. Local Scalars ..
 *
-      INTEGER           I,J,L
+      INTEGER           I,J,L,NX
       LOGICAL           QR,LQ,QL,DIRF,COLV
 *
 *     .. External Subroutines ..
@@ -193,7 +193,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
 *     .. External Functions..
 *
       LOGICAL           LSAME
-      EXTERNAL          LSAME
+      INTEGER           ILAENV
+      EXTERNAL          LSAME, ILAENV
 *
 *     .. Intrinsic Functions..
 *
@@ -219,6 +220,14 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV,
          RETURN
       END IF
 *
+*     Determine when to cross over into the level 2 based implementation
+*
+      NX = ILAENV(3, "CLARFT", DIRECT // STOREV, N, K, -1, -1)
+      IF(K.LT.NX) THEN
+         CALL CLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
+         RETURN
+      END IF
+*
 *     Beginning of executable statements
 *
       L = K / 2
diff --git a/lapack-netlib/SRC/clarft_lvl2.c b/lapack-netlib/SRC/clarft_lvl2.c
new file mode 100644
index 0000000000..2ca5490ee0
--- /dev/null
+++ b/lapack-netlib/SRC/clarft_lvl2.c
@@ -0,0 +1,935 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef blasint logical;
+
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				+ Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i])._Val[1] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				+ Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				- Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]
+				- Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]
+				- Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]
+				+ Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				- Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				+ Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]
+				- Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]
+				+ Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]
+				- Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]
+				+ Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+
+/* Table of constant values */
+
+static complex c_b1 = {1.f,0.f};
+static integer c__1 = 1;
+
+/* > \brief \b CLARFT_LVL2: Level 2 BLAS version for terminating case of CLARFT */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > Download CLARFT_LVL2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarft.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clarft.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clarft.
+f"> */
+/* > [TXT]</a> */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */
+/*                    T, LDT ) */
+
+/*       CHARACTER          DIRECT, STOREV */
+/*       INTEGER            K, LDT, LDV, N */
+/*       COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > CLARFT_LVL2 forms the triangular factor T of a complex block reflector H */
+/* > of order n, which is defined as a product of k elementary reflectors. */
+/* > */
+/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
+/* > */
+/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
+/* > */
+/* > If STOREV = 'C', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th column of the array V, and */
+/* > */
+/* >    H  =  I - V * T * V**H */
+/* > */
+/* > If STOREV = 'R', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th row of the array V, and */
+/* > */
+/* >    H  =  I - V**H * T * V */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] DIRECT */
+/* > \verbatim */
+/* >          DIRECT is CHARACTER*1 */
+/* >          Specifies the order in which the elementary reflectors are */
+/* >          multiplied to form the block reflector: */
+/* >          = 'F': H = H(1) H(2) . . . H(k) (Forward) */
+/* >          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] STOREV */
+/* > \verbatim */
+/* >          STOREV is CHARACTER*1 */
+/* >          Specifies how the vectors which define the elementary */
+/* >          reflectors are stored (see also Further Details): */
+/* >          = 'C': columnwise */
+/* >          = 'R': rowwise */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the block reflector H. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The order of the triangular factor T (= the number of */
+/* >          elementary reflectors). K >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* >          V is COMPLEX array, dimension */
+/* >                               (LDV,K) if STOREV = 'C' */
+/* >                               (LDV,N) if STOREV = 'R' */
+/* >          The matrix V. See further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. */
+/* >          If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TAU */
+/* > \verbatim */
+/* >          TAU is COMPLEX array, dimension (K) */
+/* >          TAU(i) must contain the scalar factor of the elementary */
+/* >          reflector H(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is COMPLEX array, dimension (LDT,K) */
+/* >          The k by k triangular factor T of the block reflector. */
+/* >          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
+/* >          lower triangular. The rest of the array is not used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T. LDT >= K. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup larft */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The shape of the matrix V and the storage of the vectors which define */
+/* >  the H(i) is best illustrated by the following example with n = 5 and */
+/* >  k = 3. The elements equal to 1 are not stored. */
+/* > */
+/* >  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R': */
+/* > */
+/* >               V = (  1       )                 V = (  1 v1 v1 v1 v1 ) */
+/* >                   ( v1  1    )                     (     1 v2 v2 v2 ) */
+/* >                   ( v1 v2  1 )                     (        1 v3 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R': */
+/* > */
+/* >               V = ( v1 v2 v3 )                 V = ( v1 v1  1       ) */
+/* >                   ( v1 v2 v3 )                     ( v2 v2 v2  1    ) */
+/* >                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 ) */
+/* >                   (     1 v3 ) */
+/* >                   (        1 ) */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int clarft_lvl2__(char *direct, char *storev, integer *n, 
+	integer *k, complex *v, integer *ldv, complex *tau, complex *t, 
+	integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2, q__3;
+
+    /* Local variables */
+    integer i__, j, prevlastv;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
+	    integer *, complex *, complex *, integer *, complex *, integer *, 
+	    complex *, complex *, integer *), cgemv_(char *, 
+	    integer *, integer *, complex *, complex *, integer *, complex *, 
+	    integer *, complex *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    integer lastv;
+    extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, 
+	    complex *, integer *, complex *, integer *), mecago_();
+
+
+/*  -- LAPACK auxiliary routine -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+
+/*  ===================================================================== */
+
+
+/*     Quick return if possible */
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	prevlastv = *n;
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    prevlastv = f2cmax(prevlastv,i__);
+	    i__2 = i__;
+	    if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    i__3 = j + i__ * t_dim1;
+		    t[i__3].r = 0.f, t[i__3].i = 0.f;
+		}
+	    } else {
+
+/*              general case */
+
+		if (lsame_(storev, "C")) {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			i__3 = lastv + i__ * v_dim1;
+			if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
+			    myexit_();
+			}
+		    }
+		    i__2 = i__ - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			i__3 = j + i__ * t_dim1;
+			i__4 = i__;
+			q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i;
+			r_cnjg(&q__3, &v[i__ + j * v_dim1]);
+			q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = 
+				q__2.r * q__3.i + q__2.i * q__3.r;
+			t[i__3].r = q__1.r, t[i__3].i = q__1.i;
+		    }
+		    j = f2cmin(lastv,prevlastv);
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */
+
+		    i__2 = j - i__;
+		    i__3 = i__ - 1;
+		    i__4 = i__;
+		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
+		    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__ 
+			    + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &
+			    c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1);
+		} else {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			i__3 = i__ + lastv * v_dim1;
+			if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
+			    myexit_();
+			}
+		    }
+		    i__2 = i__ - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			i__3 = j + i__ * t_dim1;
+			i__4 = i__;
+			q__2.r = -tau[i__4].r, q__2.i = -tau[i__4].i;
+			i__5 = j + i__ * v_dim1;
+			q__1.r = q__2.r * v[i__5].r - q__2.i * v[i__5].i, 
+				q__1.i = q__2.r * v[i__5].i + q__2.i * v[i__5]
+				.r;
+			t[i__3].r = q__1.r, t[i__3].i = q__1.i;
+		    }
+		    j = f2cmin(lastv,prevlastv);
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */
+
+		    i__2 = i__ - 1;
+		    i__3 = j - i__;
+		    i__4 = i__;
+		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
+		    cgemm_("N", "C", &i__2, &c__1, &i__3, &q__1, &v[(i__ + 1) 
+			    * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], 
+			    ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt);
+		}
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		i__2 = i__ + i__ * t_dim1;
+		i__3 = i__;
+		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+		if (i__ > 1) {
+		    prevlastv = f2cmax(prevlastv,lastv);
+		} else {
+		    prevlastv = lastv;
+		}
+	    }
+	}
+    } else {
+	prevlastv = 1;
+	for (i__ = *k; i__ >= 1; --i__) {
+	    i__1 = i__;
+	    if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    i__2 = j + i__ * t_dim1;
+		    t[i__2].r = 0.f, t[i__2].i = 0.f;
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    i__2 = lastv + i__ * v_dim1;
+			    if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
+				myexit_();
+			    }
+			}
+			i__1 = *k;
+			for (j = i__ + 1; j <= i__1; ++j) {
+			    i__2 = j + i__ * t_dim1;
+			    i__3 = i__;
+			    q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i;
+			    r_cnjg(&q__3, &v[*n - *k + i__ + j * v_dim1]);
+			    q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, 
+				    q__1.i = q__2.r * q__3.i + q__2.i * 
+				    q__3.r;
+			    t[i__2].r = q__1.r, t[i__2].i = q__1.i;
+			}
+			j = f2cmax(lastv,prevlastv);
+
+/*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */
+
+			i__1 = *n - *k + i__ - j;
+			i__2 = *k - i__;
+			i__3 = i__;
+			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+			cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[
+				j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * 
+				v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ * 
+				t_dim1], &c__1);
+		    } else {
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    i__2 = i__ + lastv * v_dim1;
+			    if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
+				myexit_();
+			    }
+			}
+			i__1 = *k;
+			for (j = i__ + 1; j <= i__1; ++j) {
+			    i__2 = j + i__ * t_dim1;
+			    i__3 = i__;
+			    q__2.r = -tau[i__3].r, q__2.i = -tau[i__3].i;
+			    i__4 = j + (*n - *k + i__) * v_dim1;
+			    q__1.r = q__2.r * v[i__4].r - q__2.i * v[i__4].i, 
+				    q__1.i = q__2.r * v[i__4].i + q__2.i * v[
+				    i__4].r;
+			    t[i__2].r = q__1.r, t[i__2].i = q__1.i;
+			}
+			j = f2cmax(lastv,prevlastv);
+
+/*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */
+
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__ - j;
+			i__3 = i__;
+			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+			cgemm_("N", "C", &i__1, &c__1, &i__2, &q__1, &v[i__ + 
+				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], 
+				ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt);
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ 
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		    if (i__ > 1) {
+			prevlastv = f2cmin(prevlastv,lastv);
+		    } else {
+			prevlastv = lastv;
+		    }
+		}
+		i__1 = i__ + i__ * t_dim1;
+		i__2 = i__;
+		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+	    }
+	}
+    }
+    return 0;
+
+/*     End of CLARFT_LVL2 */
+
+} /* clarft_lvl2__ */
+
diff --git a/lapack-netlib/SRC/clarft_lvl2.f b/lapack-netlib/SRC/clarft_lvl2.f
new file mode 100644
index 0000000000..3b0aea1132
--- /dev/null
+++ b/lapack-netlib/SRC/clarft_lvl2.f
@@ -0,0 +1,328 @@
+*> \brief \b CLARFT_LVL2: Level 2 BLAS version for terminating case of CLARFT
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> Download CLARFT_LVL2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarft.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clarft.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clarft.f">
+*> [TXT]</a>
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+*                    T, LDT )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          DIRECT, STOREV
+*       INTEGER            K, LDT, LDV, N
+*       ..
+*       .. Array Arguments ..
+*       COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> CLARFT_LVL2 forms the triangular factor T of a complex block reflector H
+*> of order n, which is defined as a product of k elementary reflectors.
+*>
+*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*>
+*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*>
+*> If STOREV = 'C', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th column of the array V, and
+*>
+*>    H  =  I - V * T * V**H
+*>
+*> If STOREV = 'R', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th row of the array V, and
+*>
+*>    H  =  I - V**H * T * V
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] DIRECT
+*> \verbatim
+*>          DIRECT is CHARACTER*1
+*>          Specifies the order in which the elementary reflectors are
+*>          multiplied to form the block reflector:
+*>          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*>          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*> \endverbatim
+*>
+*> \param[in] STOREV
+*> \verbatim
+*>          STOREV is CHARACTER*1
+*>          Specifies how the vectors which define the elementary
+*>          reflectors are stored (see also Further Details):
+*>          = 'C': columnwise
+*>          = 'R': rowwise
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the block reflector H. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER
+*>          The order of the triangular factor T (= the number of
+*>          elementary reflectors). K >= 1.
+*> \endverbatim
+*>
+*> \param[in] V
+*> \verbatim
+*>          V is COMPLEX array, dimension
+*>                               (LDV,K) if STOREV = 'C'
+*>                               (LDV,N) if STOREV = 'R'
+*>          The matrix V. See further details.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*>          LDV is INTEGER
+*>          The leading dimension of the array V.
+*>          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*> \endverbatim
+*>
+*> \param[in] TAU
+*> \verbatim
+*>          TAU is COMPLEX array, dimension (K)
+*>          TAU(i) must contain the scalar factor of the elementary
+*>          reflector H(i).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*>          T is COMPLEX array, dimension (LDT,K)
+*>          The k by k triangular factor T of the block reflector.
+*>          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*>          lower triangular. The rest of the array is not used.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*>          LDT is INTEGER
+*>          The leading dimension of the array T. LDT >= K.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup larft
+*
+*> \par Further Details:
+*  =====================
+*>
+*> \verbatim
+*>
+*>  The shape of the matrix V and the storage of the vectors which define
+*>  the H(i) is best illustrated by the following example with n = 5 and
+*>  k = 3. The elements equal to 1 are not stored.
+*>
+*>  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*>
+*>               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+*>                   ( v1  1    )                     (     1 v2 v2 v2 )
+*>                   ( v1 v2  1 )                     (        1 v3 v3 )
+*>                   ( v1 v2 v3 )
+*>                   ( v1 v2 v3 )
+*>
+*>  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*>
+*>               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+*>                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+*>                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+*>                   (     1 v3 )
+*>                   (        1 )
+*> \endverbatim
+*>
+*  =====================================================================
+      SUBROUTINE CLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+     $            T, LDT )
+*
+*  -- LAPACK auxiliary routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, PREVLASTV, LASTV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CGEMV, CTRMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( DIRECT, 'F' ) ) THEN
+         PREVLASTV = N
+         DO I = 1, K
+            PREVLASTV = MAX( PREVLASTV, I )
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = 1, I
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( LSAME( STOREV, 'C' ) ) THEN
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( LASTV, I ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
+*
+                  CALL CGEMV( 'Conjugate transpose', J-I, I-1,
+     $                        -TAU( I ), V( I+1, 1 ), LDV,
+     $                        V( I+1, I ), 1,
+     $                        ONE, T( 1, I ), 1 )
+               ELSE
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( I, LASTV ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * V( J , I )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
+*
+                  CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
+     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
+     $                        ONE, T( 1, I ), LDT )
+               END IF
+*
+*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+               CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
+     $                     T,
+     $                     LDT, T( 1, I ), 1 )
+               T( I, I ) = TAU( I )
+               IF( I.GT.1 ) THEN
+                  PREVLASTV = MAX( PREVLASTV, LASTV )
+               ELSE
+                  PREVLASTV = LASTV
+               END IF
+            END IF
+         END DO
+      ELSE
+         PREVLASTV = 1
+         DO I = K, 1, -1
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = I, K
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( I.LT.K ) THEN
+                  IF( LSAME( STOREV, 'C' ) ) THEN
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( LASTV, I ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
+*
+                     CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I,
+     $                           -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
+     $                           1, ONE, T( I+1, I ), 1 )
+                  ELSE
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( I, LASTV ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
+*
+                     CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J,
+     $                           -TAU( I ),
+     $                           V( I+1, J ), LDV, V( I, J ), LDV,
+     $                           ONE, T( I+1, I ), LDT )
+                  END IF
+*
+*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+                  CALL CTRMV( 'Lower', 'No transpose', 'Non-unit',
+     $                        K-I,
+     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+                  IF( I.GT.1 ) THEN
+                     PREVLASTV = MIN( PREVLASTV, LASTV )
+                  ELSE
+                     PREVLASTV = LASTV
+                  END IF
+               END IF
+               T( I, I ) = TAU( I )
+            END IF
+         END DO
+      END IF
+      RETURN
+*
+*     End of CLARFT_LVL2
+*
+      END
diff --git a/lapack-netlib/SRC/dlarft.c b/lapack-netlib/SRC/dlarft.c
index 65ce8807fc..9a968c286f 100644
--- a/lapack-netlib/SRC/dlarft.c
+++ b/lapack-netlib/SRC/dlarft.c
@@ -190,8 +190,8 @@ typedef struct Namelist Namelist;
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #ifdef _MSC_VER
-#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
-#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);}
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
 #else
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
@@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
-static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@@ -378,19 +377,24 @@ static integer smaxloc_(float *w, integer s, integer e, integer *n)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
+
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Fcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
-			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				+ Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i])._Val[1] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
-			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				+ Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				- Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
 		}
 	}
 	pCf(z) = zdotc;
@@ -415,13 +419,17 @@ static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 	_Dcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
-			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]
+				- Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
-			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]
+				- Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
 		}
 	}
 	pCd(z) = zdotc;
@@ -446,13 +454,17 @@ static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, c
 	_Fcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
-			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]
+				+ Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
-			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				- Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				+ Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
 		}
 	}
 	pCf(z) = zdotc;
@@ -477,13 +489,17 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 	_Dcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
-			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]
+				- Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]
+				+ Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
-			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]
+				- Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]
+				+ Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
 		}
 	}
 	pCd(z) = zdotc;
@@ -508,12 +524,13 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 */
 
 
-
-
 /* Table of constant values */
 
-static integer c__1 = 1;
-static doublereal c_b6 = 1.;
+static integer c__3 = 3;
+static integer c_n1 = -1;
+static integer c__2 = 2;
+static doublereal c_b13 = 1.;
+static doublereal c_b22 = -1.;
 
 /* > \brief \b DLARFT forms the triangular factor T of a block reflector H = I - vtvH */
 
@@ -522,7 +539,6 @@ static doublereal c_b6 = 1.;
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 
-/* > \htmlonly */
 /* > Download DLARFT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarft.
 f"> */
@@ -533,12 +549,11 @@ f"> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarft.
 f"> */
 /* > [TXT]</a> */
-/* > \endhtmlonly */
 
 /*  Definition: */
 /*  =========== */
 
-/*       SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
+/*        SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
 
 /*       CHARACTER          DIRECT, STOREV */
 /*       INTEGER            K, LDT, LDV, N */
@@ -646,9 +661,7 @@ f"> */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 
-/* > \date December 2016 */
-
-/* > \ingroup doubleOTHERauxiliary */
+/* > \ingroup larft */
 
 /* > \par Further Details: */
 /*  ===================== */
@@ -677,212 +690,536 @@ f"> */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
-/* Subroutine */ void dlarft_(char *direct, char *storev, integer *n, integer *
+/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer *
 	k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, 
 	integer *ldt)
 {
     /* System generated locals */
-    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
-    doublereal d__1;
+    address a__1[2];
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1[2], i__2, i__3;
+    char ch__1[2];
 
     /* Local variables */
-    integer i__, j;
+    integer i__, j, l;
+    logical lq, ql, qr;
+    integer nx;
+    extern /* Subroutine */ int dlarft_lvl2__(char *, char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *);
+    logical dirf, colv;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ void dgemv_(char *, integer *, integer *, 
-	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
-	    doublereal *, doublereal *, integer *);
-    integer lastv;
-    extern /* Subroutine */ void dtrmv_(char *, char *, char *, integer *, 
-	    doublereal *, integer *, doublereal *, integer *);
-    integer prevlastv;
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *), dlacpy_(
+	    char *, integer *, integer *, doublereal *, integer *, doublereal 
+	    *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
 
 
-/*  -- LAPACK auxiliary routine (version 3.7.0) -- */
+/*  -- LAPACK auxiliary routine -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-/*     December 2016 */
 
 
-/*  ===================================================================== */
 
 
+
+
+
+
+
+
+
+
+/*     The general scheme used is inspired by the approach inside DGEQRT3 */
+/*     which was (at the time of writing this code): */
+/*     Based on the algorithm of Elmroth and Gustavson, */
+/*     IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+
 /*     Quick return if possible */
 
     /* Parameter adjustments */
     v_dim1 = *ldv;
-    v_offset = 1 + v_dim1 * 1;
+    v_offset = 1 + v_dim1;
     v -= v_offset;
     --tau;
     t_dim1 = *ldt;
-    t_offset = 1 + t_dim1 * 1;
+    t_offset = 1 + t_dim1;
     t -= t_offset;
 
     /* Function Body */
-    if (*n == 0) {
-	return;
+    if (*n == 0 || *k == 0) {
+	return 0;
     }
 
-    if (lsame_(direct, "F")) {
-	prevlastv = *n;
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    prevlastv = f2cmax(i__,prevlastv);
-	    if (tau[i__] == 0.) {
+/*     Base case */
 
-/*              H(i)  =  I */
+    if (*n == 1 || *k == 1) {
+	t[t_dim1 + 1] = tau[1];
+	return 0;
+    }
 
-		i__2 = i__;
-		for (j = 1; j <= i__2; ++j) {
-		    t[j + i__ * t_dim1] = 0.;
-		}
-	    } else {
-
-/*              general case */
-
-		if (lsame_(storev, "C")) {
-/*                 Skip any trailing zeros. */
-		    i__2 = i__ + 1;
-		    for (lastv = *n; lastv >= i__2; --lastv) {
-			if (v[lastv + i__ * v_dim1] != 0.) {
-			    myexit_();
-			}
-		    }
-		    i__2 = i__ - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1];
-		    }
-		    j = f2cmin(lastv,prevlastv);
-
-/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */
-
-		    i__2 = j - i__;
-		    i__3 = i__ - 1;
-		    d__1 = -tau[i__];
-		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + 1 + 
-			    v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, &
-			    c_b6, &t[i__ * t_dim1 + 1], &c__1);
-		} else {
-/*                 Skip any trailing zeros. */
-		    i__2 = i__ + 1;
-		    for (lastv = *n; lastv >= i__2; --lastv) {
-			if (v[i__ + lastv * v_dim1] != 0.) {
-			    myexit_();
-			}
-		    }
-		    i__2 = i__ - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1];
-		    }
-		    j = f2cmin(lastv,prevlastv);
-
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */
-
-		    i__2 = i__ - 1;
-		    i__3 = j - i__;
-		    d__1 = -tau[i__];
-		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[(i__ + 1) *
-			     v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], 
-			    ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1);
-		}
+/*     Determine when to cross over into the level 2 based implementation */
+
+/* Writing concatenation */
+    i__1[0] = 1, a__1[0] = direct;
+    i__1[1] = 1, a__1[1] = storev;
+    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+    nx = ilaenv_(&c__3, "DLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (*k < nx) {
+	dlarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+	return 0;
+    }
 
-/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+/*     Beginning of executable statements */
 
-		i__2 = i__ - 1;
-		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
-			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
-		t[i__ + i__ * t_dim1] = tau[i__];
-		if (i__ > 1) {
-		    prevlastv = f2cmax(prevlastv,lastv);
-		} else {
-		    prevlastv = lastv;
-		}
+    l = *k / 2;
+
+/*     Determine what kind of Q we need to compute */
+/*     We assume that if the user doesn't provide 'F' for DIRECT, */
+/*     then they meant to provide 'B' and if they don't provide */
+/*     'C' for STOREV, then they meant to provide 'R' */
+
+    dirf = lsame_(direct, "F");
+    colv = lsame_(storev, "C");
+
+/*     QR happens when we have forward direction in column storage */
+
+    qr = dirf && colv;
+
+/*     LQ happens when we have forward direction in row storage */
+
+    lq = dirf && ! colv;
+
+/*     QL happens when we have backward direction in column storage */
+
+    ql = ! dirf && colv;
+
+/*     The last case is RQ. Due to how we structured this, if the */
+/*     above 3 are false, then RQ must be true, so we never store */
+/*     this */
+/*     RQ happens when we have backward direction in row storage */
+/*     RQ = (.NOT.DIRF).AND.(.NOT.COLV) */
+
+    if (qr) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |---------------| */
+/*            |V_{1,1} 0      | */
+/*            |V_{2,1} V_{2,2}| */
+/*            |V_{3,1} V_{3,2}| */
+/*            |---------------| */
+
+/*        V_{1,1}\in\R^{l,l}      unit lower triangular */
+/*        V_{2,1}\in\R^{k-l,l}    rectangular */
+/*        V_{3,1}\in\R^{n-k,l}    rectangular */
+
+/*        V_{2,2}\in\R^{k-l,k-l}  unit lower triangular */
+/*        V_{3,2}\in\R^{n-k,k-l}  rectangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} T_{1,2}| */
+/*            |0       T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\R^{l, l}     upper triangular */
+/*        T_{2,2}\in\R^{k-l, k-l} upper triangular */
+/*        T_{1,2}\in\R^{l, k-l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */
+/*        = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */
+
+/*        Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |-------| */
+/*            |V_1 V_2| */
+/*            |-------| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V*T*V' */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{1,2} */
+
+/*        Compute T_{1,1} recursively */
+
+	dlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	i__2 = *n - l;
+	i__3 = *k - l;
+	dlarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1], 
+		ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/*        Compute T_{1,2} */
+/*        T_{1,2} = V_{2,1}' */
+
+	i__2 = l;
+	for (j = 1; j <= i__2; ++j) {
+	    i__3 = *k - l;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		t[j + (l + i__) * t_dim1] = v[l + i__ + j * v_dim1];
 	    }
 	}
-    } else {
-	prevlastv = 1;
-	for (i__ = *k; i__ >= 1; --i__) {
-	    if (tau[i__] == 0.) {
 
-/*              H(i)  =  I */
+/*        T_{1,2} = T_{1,2}*V_{2,2} */
 
-		i__1 = *k;
-		for (j = i__; j <= i__1; ++j) {
-		    t[j + i__ * t_dim1] = 0.;
-		}
-	    } else {
-
-/*              general case */
-
-		if (i__ < *k) {
-		    if (lsame_(storev, "C")) {
-/*                    Skip any leading zeros. */
-			i__1 = i__ - 1;
-			for (lastv = 1; lastv <= i__1; ++lastv) {
-			    if (v[lastv + i__ * v_dim1] != 0.) {
-				myexit_();
-			    }
-			}
-			i__1 = *k;
-			for (j = i__ + 1; j <= i__1; ++j) {
-			    t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__ 
-				    + j * v_dim1];
-			}
-			j = f2cmax(lastv,prevlastv);
-
-/*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */
-
-			i__1 = *n - *k + i__ - j;
-			i__2 = *k - i__;
-			d__1 = -tau[i__];
-			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ 
-				+ 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
-				c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], &
-				c__1);
-		    } else {
-/*                    Skip any leading zeros. */
-			i__1 = i__ - 1;
-			for (lastv = 1; lastv <= i__1; ++lastv) {
-			    if (v[i__ + lastv * v_dim1] != 0.) {
-				myexit_();
-			    }
-			}
-			i__1 = *k;
-			for (j = i__ + 1; j <= i__1; ++j) {
-			    t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k 
-				    + i__) * v_dim1];
-			}
-			j = f2cmax(lastv,prevlastv);
-
-/*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */
-
-			i__1 = *k - i__;
-			i__2 = *n - *k + i__ - j;
-			d__1 = -tau[i__];
-			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + 
-				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], 
-				ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1);
-		    }
-
-/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
-		    i__1 = *k - i__;
-		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ 
-			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
-			     t_dim1], &c__1)
-			    ;
-		    if (i__ > 1) {
-			prevlastv = f2cmin(prevlastv,lastv);
-		    } else {
-			prevlastv = lastv;
-		    }
-		}
-		t[i__ + i__ * t_dim1] = tau[i__];
+	i__2 = *k - l;
+	dtrmm_("Right", "Lower", "No transpose", "Unit", &l, &i__2, &c_b13, &
+		v[l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], 
+		ldt);
+
+/*        T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__2 = *k - l;
+	i__3 = *n - *k;
+	dgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[*k + 
+		1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b13, 
+		&t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        At this point, we have that T_{1,2} = V_1'*V_2 */
+/*        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/*        respectively. */
+
+/*        T_{1,2} = -T_{1,1}*T_{1,2} */
+
+	i__2 = *k - l;
+	dtrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+		 &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*T_{2,2} */
+
+	i__2 = *k - l;
+	dtrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, &
+		c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 
+		+ 1], ldt);
+    } else if (lq) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |----------------------| */
+/*            |V_{1,1} V_{1,2} V{1,3}| */
+/*            |0       V_{2,2} V{2,3}| */
+/*            |----------------------| */
+
+/*        V_{1,1}\in\R^{l,l}      unit upper triangular */
+/*        V_{1,2}\in\R^{l,k-l}    rectangular */
+/*        V_{1,3}\in\R^{l,n-k}    rectangular */
+
+/*        V_{2,2}\in\R^{k-l,k-l}  unit upper triangular */
+/*        V_{2,3}\in\R^{k-l,n-k}  rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} T_{1,2}| */
+/*            |0       T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\R^{l, l}     upper triangular */
+/*        T_{2,2}\in\R^{k-l, k-l} upper triangular */
+/*        T_{1,2}\in\R^{l, k-l}   rectangular */
+
+/*        Then, consider the product: */
+
+/*        (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */
+/*        = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */
+
+/*        Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |---| */
+/*            |V_1| */
+/*            |V_2| */
+/*            |---| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V'*T*V */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{1,2} */
+
+/*        Compute T_{1,1} recursively */
+
+	dlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	i__2 = *n - l;
+	i__3 = *k - l;
+	dlarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1], 
+		ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/*        Compute T_{1,2} */
+/*        T_{1,2} = V_{1,2} */
+
+	i__2 = *k - l;
+	dlacpy_("All", &l, &i__2, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) * 
+		t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*V_{2,2}' */
+
+	i__2 = *k - l;
+	dtrmm_("Right", "Upper", "Transpose", "Unit", &l, &i__2, &c_b13, &v[l 
+		+ 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__2 = *k - l;
+	i__3 = *n - *k;
+	dgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[(*k 
+		+ 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, &
+		c_b13, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        At this point, we have that T_{1,2} = V_1*V_2' */
+/*        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/*        respectively. */
+
+/*        T_{1,2} = -T_{1,1}*T_{1,2} */
+
+	i__2 = *k - l;
+	dtrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+		 &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*T_{2,2} */
+
+	i__2 = *k - l;
+	dtrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, &
+		c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 
+		+ 1], ldt);
+    } else if (ql) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |---------------| */
+/*            |V_{1,1} V_{1,2}| */
+/*            |V_{2,1} V_{2,2}| */
+/*            |0       V_{3,2}| */
+/*            |---------------| */
+
+/*        V_{1,1}\in\R^{n-k,k-l}  rectangular */
+/*        V_{2,1}\in\R^{k-l,k-l}  unit upper triangular */
+
+/*        V_{1,2}\in\R^{n-k,l}    rectangular */
+/*        V_{2,2}\in\R^{k-l,l}    rectangular */
+/*        V_{3,2}\in\R^{l,l}      unit upper triangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} 0      | */
+/*            |T_{2,1} T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */
+/*        T_{2,2}\in\R^{l, l}     non-unit lower triangular */
+/*        T_{2,1}\in\R^{k-l, l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */
+/*        = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */
+
+/*        Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |-------| */
+/*            |V_1 V_2| */
+/*            |-------| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V*T*V' */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{2,1} */
+
+/*        Compute T_{1,1} recursively */
+
+	i__2 = *n - l;
+	i__3 = *k - l;
+	dlarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	dlarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, &
+		tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/*        Compute T_{2,1} */
+/*        T_{2,1} = V_{2,2}' */
+
+	i__2 = *k - l;
+	for (j = 1; j <= i__2; ++j) {
+	    i__3 = l;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		t[*k - l + i__ + j * t_dim1] = v[*n - *k + j + (*k - l + i__) 
+			* v_dim1];
 	    }
 	}
-    }
-    return;
 
-/*     End of DLARFT */
+/*        T_{2,1} = T_{2,1}*V_{2,1} */
+
+	i__2 = *k - l;
+	dtrmm_("Right", "Upper", "No transpose", "Unit", &l, &i__2, &c_b13, &
+		v[*n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
 
+	i__2 = *k - l;
+	i__3 = *n - *k;
+	dgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[(*k 
+		- l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b13, &t[*k 
+		- l + 1 + t_dim1], ldt);
+
+/*        At this point, we have that T_{2,1} = V_2'*V_1 */
+/*        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/*        respectively. */
+
+/*        T_{2,1} = -T_{2,2}*T_{2,1} */
+
+	i__2 = *k - l;
+	dtrmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+		 &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + 
+		t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*T_{1,1} */
+
+	i__2 = *k - l;
+	dtrmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__2, &
+		c_b13, &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+    } else {
+
+/*        Else means RQ case */
+
+/*        Break V apart into 6 components */
+
+/*        V = |-----------------------| */
+/*            |V_{1,1} V_{1,2} 0      | */
+/*            |V_{2,1} V_{2,2} V_{2,3}| */
+/*            |-----------------------| */
+
+/*        V_{1,1}\in\R^{k-l,n-k}  rectangular */
+/*        V_{1,2}\in\R^{k-l,k-l}  unit lower triangular */
+
+/*        V_{2,1}\in\R^{l,n-k}    rectangular */
+/*        V_{2,2}\in\R^{l,k-l}    rectangular */
+/*        V_{2,3}\in\R^{l,l}      unit lower triangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} 0      | */
+/*            |T_{2,1} T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */
+/*        T_{2,2}\in\R^{l, l}     non-unit lower triangular */
+/*        T_{2,1}\in\R^{k-l, l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */
+/*        = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */
+
+/*        Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |---| */
+/*            |V_1| */
+/*            |V_2| */
+/*            |---| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V'*T*V */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{2,1} */
+
+/*        Compute T_{1,1} recursively */
+
+	i__2 = *n - l;
+	i__3 = *k - l;
+	dlarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	dlarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k 
+		- l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/*        Compute T_{2,1} */
+/*        T_{2,1} = V_{2,2} */
+
+	i__2 = *k - l;
+	dlacpy_("All", &l, &i__2, &v[*k - l + 1 + (*n - *k + 1) * v_dim1], 
+		ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*V_{1,2}' */
+
+	i__2 = *k - l;
+	dtrmm_("Right", "Lower", "Transpose", "Unit", &l, &i__2, &c_b13, &v[(*
+		n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__2 = *k - l;
+	i__3 = *n - *k;
+	dgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[*k - 
+		l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b13, &t[*k - l + 
+		1 + t_dim1], ldt);
+
+/*        At this point, we have that T_{2,1} = V_2*V_1' */
+/*        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/*        respectively. */
+
+/*        T_{2,1} = -T_{2,2}*T_{2,1} */
+
+	i__2 = *k - l;
+	dtrmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b22, 
+		&t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + 
+		t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*T_{1,1} */
+
+	i__2 = *k - l;
+	dtrmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b13,
+		 &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+    }
+    return 0;
 } /* dlarft_ */
 
diff --git a/lapack-netlib/SRC/dlarft.f b/lapack-netlib/SRC/dlarft.f
index c27bb1a806..fa2ea0d134 100644
--- a/lapack-netlib/SRC/dlarft.f
+++ b/lapack-netlib/SRC/dlarft.f
@@ -5,7 +5,6 @@
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
-*> \htmlonly
 *> Download DLARFT + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarft.f">
 *> [TGZ]</a>
@@ -13,7 +12,6 @@
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarft.f">
 *> [TXT]</a>
-*> \endhtmlonly
 *
 *  Definition:
 *  ===========
@@ -161,6 +159,7 @@
 *  =====================================================================
       RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV,
      $                             TAU, T, LDT )
+      IMPLICIT NONE
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
@@ -183,7 +182,7 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV,
 *
 *     .. Local Scalars ..
 *
-      INTEGER           I,J,L
+      INTEGER           I,J,L,NX
       LOGICAL           QR,LQ,QL,DIRF,COLV
 *
 *     .. External Subroutines ..
@@ -193,7 +192,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV,
 *     .. External Functions..
 *
       LOGICAL           LSAME
-      EXTERNAL          LSAME
+      INTEGER           ILAENV
+      EXTERNAL          LSAME, ILAENV
 *     
 *     The general scheme used is inspired by the approach inside DGEQRT3
 *     which was (at the time of writing this code):
@@ -215,6 +215,14 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV,
          RETURN
       END IF
 *
+*     Determine when to cross over into the level 2 based implementation
+*
+      NX = ILAENV(3, "DLARFT", DIRECT // STOREV, N, K, -1, -1)
+      IF(K.LT.NX) THEN
+         CALL DLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
+         RETURN
+      END IF
+*
 *     Beginning of executable statements
 *
       L = K / 2
diff --git a/lapack-netlib/SRC/dlarft_lvl2.c b/lapack-netlib/SRC/dlarft_lvl2.c
new file mode 100644
index 0000000000..6bd748c436
--- /dev/null
+++ b/lapack-netlib/SRC/dlarft_lvl2.c
@@ -0,0 +1,895 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef blasint logical;
+
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				+ Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i])._Val[1] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				+ Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				- Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]
+				- Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]
+				- Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]
+				+ Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				- Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				+ Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]
+				- Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]
+				+ Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]
+				- Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]
+				+ Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b6 = 1.;
+
+/* > \brief \b DLARFT_LVL2: Level 2 BLAS version for terminating case of DLARFT. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > Download DLARFT_LVL2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarft.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarft.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarft.
+f"> */
+/* > [TXT]</a> */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */
+/*                    T, LDT ) */
+
+/*       CHARACTER          DIRECT, STOREV */
+/*       INTEGER            K, LDT, LDV, N */
+/*       DOUBLE PRECISION   T( LDT, * ), TAU( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DLARFT_LVL2 forms the triangular factor T of a real block reflector H */
+/* > of order n, which is defined as a product of k elementary reflectors. */
+/* > */
+/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
+/* > */
+/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
+/* > */
+/* > If STOREV = 'C', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th column of the array V, and */
+/* > */
+/* >    H  =  I - V * T * V**T */
+/* > */
+/* > If STOREV = 'R', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th row of the array V, and */
+/* > */
+/* >    H  =  I - V**T * T * V */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] DIRECT */
+/* > \verbatim */
+/* >          DIRECT is CHARACTER*1 */
+/* >          Specifies the order in which the elementary reflectors are */
+/* >          multiplied to form the block reflector: */
+/* >          = 'F': H = H(1) H(2) . . . H(k) (Forward) */
+/* >          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] STOREV */
+/* > \verbatim */
+/* >          STOREV is CHARACTER*1 */
+/* >          Specifies how the vectors which define the elementary */
+/* >          reflectors are stored (see also Further Details): */
+/* >          = 'C': columnwise */
+/* >          = 'R': rowwise */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the block reflector H. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The order of the triangular factor T (= the number of */
+/* >          elementary reflectors). K >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* >          V is DOUBLE PRECISION array, dimension */
+/* >                               (LDV,K) if STOREV = 'C' */
+/* >                               (LDV,N) if STOREV = 'R' */
+/* >          The matrix V. See further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. */
+/* >          If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (K) */
+/* >          TAU(i) must contain the scalar factor of the elementary */
+/* >          reflector H(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (LDT,K) */
+/* >          The k by k triangular factor T of the block reflector. */
+/* >          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
+/* >          lower triangular. The rest of the array is not used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T. LDT >= K. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup larft */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The shape of the matrix V and the storage of the vectors which define */
+/* >  the H(i) is best illustrated by the following example with n = 5 and */
+/* >  k = 3. The elements equal to 1 are not stored. */
+/* > */
+/* >  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R': */
+/* > */
+/* >               V = (  1       )                 V = (  1 v1 v1 v1 v1 ) */
+/* >                   ( v1  1    )                     (     1 v2 v2 v2 ) */
+/* >                   ( v1 v2  1 )                     (        1 v3 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R': */
+/* > */
+/* >               V = ( v1 v2 v3 )                 V = ( v1 v1  1       ) */
+/* >                   ( v1 v2 v3 )                     ( v2 v2 v2  1    ) */
+/* >                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 ) */
+/* >                   (     1 v3 ) */
+/* >                   (        1 ) */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dlarft_lvl2__(char *direct, char *storev, integer *n, 
+	integer *k, doublereal *v, integer *ldv, doublereal *tau, doublereal *
+	t, integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    integer i__, j, prevlastv;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *);
+    integer lastv;
+    extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *), mecago_();
+
+
+/*  -- LAPACK auxiliary routine -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+
+/*  ===================================================================== */
+
+
+/*     Quick return if possible */
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	prevlastv = *n;
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    prevlastv = f2cmax(i__,prevlastv);
+	    if (tau[i__] == 0.) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    t[j + i__ * t_dim1] = 0.;
+		}
+	    } else {
+
+/*              general case */
+
+		if (lsame_(storev, "C")) {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			if (v[lastv + i__ * v_dim1] != 0.) {
+			    myexit_();
+			}
+		    }
+		    i__2 = i__ - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1];
+		    }
+		    j = f2cmin(lastv,prevlastv);
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */
+
+		    i__2 = j - i__;
+		    i__3 = i__ - 1;
+		    d__1 = -tau[i__];
+		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + 1 + 
+			    v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, &
+			    c_b6, &t[i__ * t_dim1 + 1], &c__1);
+		} else {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			if (v[i__ + lastv * v_dim1] != 0.) {
+			    myexit_();
+			}
+		    }
+		    i__2 = i__ - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1];
+		    }
+		    j = f2cmin(lastv,prevlastv);
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */
+
+		    i__2 = i__ - 1;
+		    i__3 = j - i__;
+		    d__1 = -tau[i__];
+		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[(i__ + 1) *
+			     v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], 
+			    ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1);
+		}
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		t[i__ + i__ * t_dim1] = tau[i__];
+		if (i__ > 1) {
+		    prevlastv = f2cmax(prevlastv,lastv);
+		} else {
+		    prevlastv = lastv;
+		}
+	    }
+	}
+    } else {
+	prevlastv = 1;
+	for (i__ = *k; i__ >= 1; --i__) {
+	    if (tau[i__] == 0.) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    t[j + i__ * t_dim1] = 0.;
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    if (v[lastv + i__ * v_dim1] != 0.) {
+				myexit_();
+			    }
+			}
+			i__1 = *k;
+			for (j = i__ + 1; j <= i__1; ++j) {
+			    t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__ 
+				    + j * v_dim1];
+			}
+			j = f2cmax(lastv,prevlastv);
+
+/*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */
+
+			i__1 = *n - *k + i__ - j;
+			i__2 = *k - i__;
+			d__1 = -tau[i__];
+			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__ 
+				+ 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
+				c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
+		    } else {
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    if (v[i__ + lastv * v_dim1] != 0.) {
+				myexit_();
+			    }
+			}
+			i__1 = *k;
+			for (j = i__ + 1; j <= i__1; ++j) {
+			    t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k 
+				    + i__) * v_dim1];
+			}
+			j = f2cmax(lastv,prevlastv);
+
+/*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */
+
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__ - j;
+			d__1 = -tau[i__];
+			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + 
+				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], 
+				ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1);
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ 
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		    if (i__ > 1) {
+			prevlastv = f2cmin(prevlastv,lastv);
+		    } else {
+			prevlastv = lastv;
+		    }
+		}
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+	}
+    }
+    return 0;
+
+/*     End of DLARFT_LVL2 */
+
+} /* dlarft_lvl2__ */
+
diff --git a/lapack-netlib/SRC/dlarft_lvl2.f b/lapack-netlib/SRC/dlarft_lvl2.f
new file mode 100644
index 0000000000..9614df466d
--- /dev/null
+++ b/lapack-netlib/SRC/dlarft_lvl2.f
@@ -0,0 +1,326 @@
+*> \brief \b DLARFT_LVL2: Level 2 BLAS version for terminating case of DLARFT.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> Download DLARFT_LVL2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlarft.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlarft.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlarft.f">
+*> [TXT]</a>
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+*                    T, LDT )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          DIRECT, STOREV
+*       INTEGER            K, LDT, LDV, N
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   T( LDT, * ), TAU( * ), V( LDV, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DLARFT_LVL2 forms the triangular factor T of a real block reflector H
+*> of order n, which is defined as a product of k elementary reflectors.
+*>
+*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*>
+*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*>
+*> If STOREV = 'C', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th column of the array V, and
+*>
+*>    H  =  I - V * T * V**T
+*>
+*> If STOREV = 'R', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th row of the array V, and
+*>
+*>    H  =  I - V**T * T * V
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] DIRECT
+*> \verbatim
+*>          DIRECT is CHARACTER*1
+*>          Specifies the order in which the elementary reflectors are
+*>          multiplied to form the block reflector:
+*>          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*>          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*> \endverbatim
+*>
+*> \param[in] STOREV
+*> \verbatim
+*>          STOREV is CHARACTER*1
+*>          Specifies how the vectors which define the elementary
+*>          reflectors are stored (see also Further Details):
+*>          = 'C': columnwise
+*>          = 'R': rowwise
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the block reflector H. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER
+*>          The order of the triangular factor T (= the number of
+*>          elementary reflectors). K >= 1.
+*> \endverbatim
+*>
+*> \param[in] V
+*> \verbatim
+*>          V is DOUBLE PRECISION array, dimension
+*>                               (LDV,K) if STOREV = 'C'
+*>                               (LDV,N) if STOREV = 'R'
+*>          The matrix V. See further details.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*>          LDV is INTEGER
+*>          The leading dimension of the array V.
+*>          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*> \endverbatim
+*>
+*> \param[in] TAU
+*> \verbatim
+*>          TAU is DOUBLE PRECISION array, dimension (K)
+*>          TAU(i) must contain the scalar factor of the elementary
+*>          reflector H(i).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*>          T is DOUBLE PRECISION array, dimension (LDT,K)
+*>          The k by k triangular factor T of the block reflector.
+*>          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*>          lower triangular. The rest of the array is not used.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*>          LDT is INTEGER
+*>          The leading dimension of the array T. LDT >= K.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup larft
+*
+*> \par Further Details:
+*  =====================
+*>
+*> \verbatim
+*>
+*>  The shape of the matrix V and the storage of the vectors which define
+*>  the H(i) is best illustrated by the following example with n = 5 and
+*>  k = 3. The elements equal to 1 are not stored.
+*>
+*>  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*>
+*>               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+*>                   ( v1  1    )                     (     1 v2 v2 v2 )
+*>                   ( v1 v2  1 )                     (        1 v3 v3 )
+*>                   ( v1 v2 v3 )
+*>                   ( v1 v2 v3 )
+*>
+*>  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*>
+*>               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+*>                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+*>                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+*>                   (     1 v3 )
+*>                   (        1 )
+*> \endverbatim
+*>
+*  =====================================================================
+      SUBROUTINE DLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+     $            T, LDT )
+*
+*  -- LAPACK auxiliary routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, PREVLASTV, LASTV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DTRMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( DIRECT, 'F' ) ) THEN
+         PREVLASTV = N
+         DO I = 1, K
+            PREVLASTV = MAX( I, PREVLASTV )
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = 1, I
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( LSAME( STOREV, 'C' ) ) THEN
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( LASTV, I ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * V( I , J )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
+*
+                  CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ),
+     $                        V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE,
+     $                        T( 1, I ), 1 )
+               ELSE
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( I, LASTV ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * V( J , I )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
+*
+                  CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ),
+     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE,
+     $                        T( 1, I ), 1 )
+               END IF
+*
+*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+               CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
+     $                     T,
+     $                     LDT, T( 1, I ), 1 )
+               T( I, I ) = TAU( I )
+               IF( I.GT.1 ) THEN
+                  PREVLASTV = MAX( PREVLASTV, LASTV )
+               ELSE
+                  PREVLASTV = LASTV
+               END IF
+            END IF
+         END DO
+      ELSE
+         PREVLASTV = 1
+         DO I = K, 1, -1
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = I, K
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( I.LT.K ) THEN
+                  IF( LSAME( STOREV, 'C' ) ) THEN
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( LASTV, I ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * V( N-K+I , J )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
+*
+                     CALL DGEMV( 'Transpose', N-K+I-J, K-I,
+     $                           -TAU( I ),
+     $                           V( J, I+1 ), LDV, V( J, I ), 1, ONE,
+     $                           T( I+1, I ), 1 )
+                  ELSE
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( I, LASTV ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
+*
+                     CALL DGEMV( 'No transpose', K-I, N-K+I-J,
+     $                    -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV,
+     $                    ONE, T( I+1, I ), 1 )
+                  END IF
+*
+*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+                  CALL DTRMV( 'Lower', 'No transpose', 'Non-unit',
+     $                        K-I,
+     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+                  IF( I.GT.1 ) THEN
+                     PREVLASTV = MIN( PREVLASTV, LASTV )
+                  ELSE
+                     PREVLASTV = LASTV
+                  END IF
+               END IF
+               T( I, I ) = TAU( I )
+            END IF
+         END DO
+      END IF
+      RETURN
+*
+*     End of DLARFT_LVL2
+*
+      END
diff --git a/lapack-netlib/SRC/ilaenv.f b/lapack-netlib/SRC/ilaenv.f
index e74a2b35ec..e108108d7f 100644
--- a/lapack-netlib/SRC/ilaenv.f
+++ b/lapack-netlib/SRC/ilaenv.f
@@ -5,7 +5,6 @@
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
-*> \htmlonly
 *> Download ILAENV + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ilaenv.f">
 *> [TGZ]</a>
@@ -13,7 +12,6 @@
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ilaenv.f">
 *> [TXT]</a>
-*> \endhtmlonly
 *
 *  Definition:
 *  ===========
@@ -159,6 +157,7 @@
 *>
 *  =====================================================================
       INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 )
+      IMPLICIT NONE
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
@@ -667,6 +666,10 @@ INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 )
          IF( C3.EQ.'HD3' ) THEN
             NX = 128
          END IF
+      ELSE IF( C2.EQ.'LA' ) THEN
+         IF( C3.EQ.'RFT' ) THEN
+            NX = 64
+         END IF
       END IF
       ILAENV = NX
       RETURN
diff --git a/lapack-netlib/SRC/slarft.c b/lapack-netlib/SRC/slarft.c
index 6f8350b218..34445172b1 100644
--- a/lapack-netlib/SRC/slarft.c
+++ b/lapack-netlib/SRC/slarft.c
@@ -190,8 +190,8 @@ typedef struct Namelist Namelist;
 #define c_abs(z) (cabsf(Cf(z)))
 #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
 #ifdef _MSC_VER
-#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
-#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);}
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
 #else
 #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
 #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
@@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
-static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@@ -378,19 +377,24 @@ static integer smaxloc_(float *w, integer s, integer e, integer *n)
 		if (w[i-1]>m) mi=i ,m=w[i-1];
 	return mi-s+1;
 }
+
 static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
 	integer n = *n_, incx = *incx_, incy = *incy_, i;
 #ifdef _MSC_VER
 	_Fcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
-			zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				+ Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i])._Val[1] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
-			zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				+ Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				- Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
 		}
 	}
 	pCf(z) = zdotc;
@@ -415,13 +419,17 @@ static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 	_Dcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
-			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]
+				- Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
-			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]
+				- Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
 		}
 	}
 	pCd(z) = zdotc;
@@ -446,13 +454,17 @@ static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, c
 	_Fcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
-			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]
+				+ Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
-			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				- Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				+ Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
 		}
 	}
 	pCf(z) = zdotc;
@@ -477,13 +489,17 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 	_Dcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
-			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]
+				- Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]
+				+ Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
-			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]
+				- Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]
+				+ Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
 		}
 	}
 	pCd(z) = zdotc;
@@ -508,12 +524,13 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 */
 
 
-
-
 /* Table of constant values */
 
-static integer c__1 = 1;
-static real c_b6 = 1.f;
+static integer c__3 = 3;
+static integer c_n1 = -1;
+static integer c__2 = 2;
+static real c_b13 = 1.f;
+static real c_b22 = -1.f;
 
 /* > \brief \b SLARFT forms the triangular factor T of a block reflector H = I - vtvH */
 
@@ -522,7 +539,6 @@ static real c_b6 = 1.f;
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 
-/* > \htmlonly */
 /* > Download SLARFT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slarft.
 f"> */
@@ -533,12 +549,11 @@ f"> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slarft.
 f"> */
 /* > [TXT]</a> */
-/* > \endhtmlonly */
 
 /*  Definition: */
 /*  =========== */
 
-/*       SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
+/*        SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
 
 /*       CHARACTER          DIRECT, STOREV */
 /*       INTEGER            K, LDT, LDV, N */
@@ -643,12 +658,10 @@ f"> */
 
 /* > \author Univ. of Tennessee */
 /* > \author Univ. of California Berkeley */
-/* > \author Univ. of Colorado Denver */
+/* > \author Johnathan Rhyne, Univ. of Colorado Denver (original author, 2024) */
 /* > \author NAG Ltd. */
 
-/* > \date December 2016 */
-
-/* > \ingroup realOTHERauxiliary */
+/* > \ingroup larft */
 
 /* > \par Further Details: */
 /*  ===================== */
@@ -677,211 +690,533 @@ f"> */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
-/* Subroutine */ void slarft_(char *direct, char *storev, integer *n, integer *
+/* Subroutine */ int slarft_(char *direct, char *storev, integer *n, integer *
 	k, real *v, integer *ldv, real *tau, real *t, integer *ldt)
 {
     /* System generated locals */
-    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
-    real r__1;
+    address a__1[2];
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1[2], i__2, i__3;
+    char ch__1[2];
 
     /* Local variables */
-    integer i__, j;
+    integer i__, j, l;
+    logical lq, ql, qr;
+    integer nx;
+    extern /* Subroutine */ int slarft_lvl2__(char *, char *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *);
+    logical dirf, colv;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ void sgemv_(char *, integer *, integer *, real *, 
-	    real *, integer *, real *, integer *, real *, real *, integer *);
-    integer lastv;
-    extern /* Subroutine */ void strmv_(char *, char *, char *, integer *, 
-	    real *, integer *, real *, integer *);
-    integer prevlastv;
-    extern /* Subroutine */ void mecago_();
-
-
-/*  -- LAPACK auxiliary routine (version 3.7.0) -- */
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *), strmm_(char *, char *, char *,
+	     char *, integer *, integer *, real *, real *, integer *, real *, 
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *);
+
+
+/*  -- LAPACK auxiliary routine -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-/*     December 2016 */
 
 
-/*  ===================================================================== */
 
 
+
+
+
+
+
+
+
+
+/*     The general scheme used is inspired by the approach inside DGEQRT3 */
+/*     which was (at the time of writing this code): */
+/*     Based on the algorithm of Elmroth and Gustavson, */
+/*     IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+
 /*     Quick return if possible */
 
     /* Parameter adjustments */
     v_dim1 = *ldv;
-    v_offset = 1 + v_dim1 * 1;
+    v_offset = 1 + v_dim1;
     v -= v_offset;
     --tau;
     t_dim1 = *ldt;
-    t_offset = 1 + t_dim1 * 1;
+    t_offset = 1 + t_dim1;
     t -= t_offset;
 
     /* Function Body */
-    if (*n == 0) {
-	return;
+    if (*n == 0 || *k == 0) {
+	return 0;
     }
 
-    if (lsame_(direct, "F")) {
-	prevlastv = *n;
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    prevlastv = f2cmax(i__,prevlastv);
-	    if (tau[i__] == 0.f) {
+/*     Base case */
 
-/*              H(i)  =  I */
+    if (*n == 1 || *k == 1) {
+	t[t_dim1 + 1] = tau[1];
+	return 0;
+    }
 
-		i__2 = i__;
-		for (j = 1; j <= i__2; ++j) {
-		    t[j + i__ * t_dim1] = 0.f;
-		}
-	    } else {
-
-/*              general case */
-
-		if (lsame_(storev, "C")) {
-/*                 Skip any trailing zeros. */
-		    i__2 = i__ + 1;
-		    for (lastv = *n; lastv >= i__2; --lastv) {
-			if (v[lastv + i__ * v_dim1] != 0.f) {
-			    myexit_();
-			}
-		    }
-		    i__2 = i__ - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1];
-		    }
-		    j = f2cmin(lastv,prevlastv);
-
-/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */
-
-		    i__2 = j - i__;
-		    i__3 = i__ - 1;
-		    r__1 = -tau[i__];
-		    sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + 1 + 
-			    v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, &
-			    c_b6, &t[i__ * t_dim1 + 1], &c__1);
-		} else {
-/*                 Skip any trailing zeros. */
-		    i__2 = i__ + 1;
-		    for (lastv = *n; lastv >= i__2; --lastv) {
-			if (v[i__ + lastv * v_dim1] != 0.f) {
-			    myexit_();
-			}
-		    }
-		    i__2 = i__ - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1];
-		    }
-		    j = f2cmin(lastv,prevlastv);
-
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */
-
-		    i__2 = i__ - 1;
-		    i__3 = j - i__;
-		    r__1 = -tau[i__];
-		    sgemv_("No transpose", &i__2, &i__3, &r__1, &v[(i__ + 1) *
-			     v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], 
-			    ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1);
-		}
+/*     Determine when to cross over into the level 2 based implementation */
+
+/* Writing concatenation */
+    i__1[0] = 1, a__1[0] = direct;
+    i__1[1] = 1, a__1[1] = storev;
+    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+    nx = ilaenv_(&c__3, "SLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (*k < nx) {
+	slarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+	return 0;
+    }
 
-/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+/*     Beginning of executable statements */
 
-		i__2 = i__ - 1;
-		strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
-			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
-		t[i__ + i__ * t_dim1] = tau[i__];
-		if (i__ > 1) {
-		    prevlastv = f2cmax(prevlastv,lastv);
-		} else {
-		    prevlastv = lastv;
-		}
+    l = *k / 2;
+
+/*     Determine what kind of Q we need to compute */
+/*     We assume that if the user doesn't provide 'F' for DIRECT, */
+/*     then they meant to provide 'B' and if they don't provide */
+/*     'C' for STOREV, then they meant to provide 'R' */
+
+    dirf = lsame_(direct, "F");
+    colv = lsame_(storev, "C");
+
+/*     QR happens when we have forward direction in column storage */
+
+    qr = dirf && colv;
+
+/*     LQ happens when we have forward direction in row storage */
+
+    lq = dirf && ! colv;
+
+/*     QL happens when we have backward direction in column storage */
+
+    ql = ! dirf && colv;
+
+/*     The last case is RQ. Due to how we structured this, if the */
+/*     above 3 are false, then RQ must be true, so we never store */
+/*     this */
+/*     RQ happens when we have backward direction in row storage */
+/*     RQ = (.NOT.DIRF).AND.(.NOT.COLV) */
+
+    if (qr) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |---------------| */
+/*            |V_{1,1} 0      | */
+/*            |V_{2,1} V_{2,2}| */
+/*            |V_{3,1} V_{3,2}| */
+/*            |---------------| */
+
+/*        V_{1,1}\in\R^{l,l}      unit lower triangular */
+/*        V_{2,1}\in\R^{k-l,l}    rectangular */
+/*        V_{3,1}\in\R^{n-k,l}    rectangular */
+
+/*        V_{2,2}\in\R^{k-l,k-l}  unit lower triangular */
+/*        V_{3,2}\in\R^{n-k,k-l}  rectangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} T_{1,2}| */
+/*            |0       T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\R^{l, l}     upper triangular */
+/*        T_{2,2}\in\R^{k-l, k-l} upper triangular */
+/*        T_{1,2}\in\R^{l, k-l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */
+/*        = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */
+
+/*        Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |-------| */
+/*            |V_1 V_2| */
+/*            |-------| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V*T*V' */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{1,2} */
+
+/*        Compute T_{1,1} recursively */
+
+	slarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	i__2 = *n - l;
+	i__3 = *k - l;
+	slarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1], 
+		ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/*        Compute T_{1,2} */
+/*        T_{1,2} = V_{2,1}' */
+
+	i__2 = l;
+	for (j = 1; j <= i__2; ++j) {
+	    i__3 = *k - l;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		t[j + (l + i__) * t_dim1] = v[l + i__ + j * v_dim1];
 	    }
 	}
-    } else {
-	prevlastv = 1;
-	for (i__ = *k; i__ >= 1; --i__) {
-	    if (tau[i__] == 0.f) {
 
-/*              H(i)  =  I */
+/*        T_{1,2} = T_{1,2}*V_{2,2} */
 
-		i__1 = *k;
-		for (j = i__; j <= i__1; ++j) {
-		    t[j + i__ * t_dim1] = 0.f;
-		}
-	    } else {
-
-/*              general case */
-
-		if (i__ < *k) {
-		    if (lsame_(storev, "C")) {
-/*                    Skip any leading zeros. */
-			i__1 = i__ - 1;
-			for (lastv = 1; lastv <= i__1; ++lastv) {
-			    if (v[lastv + i__ * v_dim1] != 0.f) {
-				myexit_();
-			    }
-			}
-			i__1 = *k;
-			for (j = i__ + 1; j <= i__1; ++j) {
-			    t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__ 
-				    + j * v_dim1];
-			}
-			j = f2cmax(lastv,prevlastv);
-
-/*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */
-
-			i__1 = *n - *k + i__ - j;
-			i__2 = *k - i__;
-			r__1 = -tau[i__];
-			sgemv_("Transpose", &i__1, &i__2, &r__1, &v[j + (i__ 
-				+ 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
-				c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], &
-				c__1);
-		    } else {
-/*                    Skip any leading zeros. */
-			i__1 = i__ - 1;
-			for (lastv = 1; lastv <= i__1; ++lastv) {
-			    if (v[i__ + lastv * v_dim1] != 0.f) {
-				myexit_();
-			    }
-			}
-			i__1 = *k;
-			for (j = i__ + 1; j <= i__1; ++j) {
-			    t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k 
-				    + i__) * v_dim1];
-			}
-			j = f2cmax(lastv,prevlastv);
-
-/*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */
-
-			i__1 = *k - i__;
-			i__2 = *n - *k + i__ - j;
-			r__1 = -tau[i__];
-			sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ + 
-				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], 
-				ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1);
-		    }
-
-/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
-		    i__1 = *k - i__;
-		    strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ 
-			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
-			     t_dim1], &c__1)
-			    ;
-		    if (i__ > 1) {
-			prevlastv = f2cmin(prevlastv,lastv);
-		    } else {
-			prevlastv = lastv;
-		    }
-		}
-		t[i__ + i__ * t_dim1] = tau[i__];
+	i__2 = *k - l;
+	strmm_("Right", "Lower", "No transpose", "Unit", &l, &i__2, &c_b13, &
+		v[l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], 
+		ldt);
+
+/*        T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__2 = *k - l;
+	i__3 = *n - *k;
+	sgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[*k + 
+		1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b13, 
+		&t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        At this point, we have that T_{1,2} = V_1'*V_2 */
+/*        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/*        respectively. */
+
+/*        T_{1,2} = -T_{1,1}*T_{1,2} */
+
+	i__2 = *k - l;
+	strmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+		 &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*T_{2,2} */
+
+	i__2 = *k - l;
+	strmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, &
+		c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 
+		+ 1], ldt);
+    } else if (lq) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |----------------------| */
+/*            |V_{1,1} V_{1,2} V{1,3}| */
+/*            |0       V_{2,2} V{2,3}| */
+/*            |----------------------| */
+
+/*        V_{1,1}\in\R^{l,l}      unit upper triangular */
+/*        V_{1,2}\in\R^{l,k-l}    rectangular */
+/*        V_{1,3}\in\R^{l,n-k}    rectangular */
+
+/*        V_{2,2}\in\R^{k-l,k-l}  unit upper triangular */
+/*        V_{2,3}\in\R^{k-l,n-k}  rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} T_{1,2}| */
+/*            |0       T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\R^{l, l}     upper triangular */
+/*        T_{2,2}\in\R^{k-l, k-l} upper triangular */
+/*        T_{1,2}\in\R^{l, k-l}   rectangular */
+
+/*        Then, consider the product: */
+
+/*        (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */
+/*        = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */
+
+/*        Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |---| */
+/*            |V_1| */
+/*            |V_2| */
+/*            |---| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V'*T*V */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{1,2} */
+
+/*        Compute T_{1,1} recursively */
+
+	slarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	i__2 = *n - l;
+	i__3 = *k - l;
+	slarft_(direct, storev, &i__2, &i__3, &v[l + 1 + (l + 1) * v_dim1], 
+		ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/*        Compute T_{1,2} */
+/*        T_{1,2} = V_{1,2} */
+
+	i__2 = *k - l;
+	slacpy_("All", &l, &i__2, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) * 
+		t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*V_{2,2}' */
+
+	i__2 = *k - l;
+	strmm_("Right", "Upper", "Transpose", "Unit", &l, &i__2, &c_b13, &v[l 
+		+ 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__2 = *k - l;
+	i__3 = *n - *k;
+	sgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[(*k 
+		+ 1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, &
+		c_b13, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        At this point, we have that T_{1,2} = V_1*V_2' */
+/*        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/*        respectively. */
+
+/*        T_{1,2} = -T_{1,1}*T_{1,2} */
+
+	i__2 = *k - l;
+	strmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+		 &t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*T_{2,2} */
+
+	i__2 = *k - l;
+	strmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__2, &
+		c_b13, &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 
+		+ 1], ldt);
+    } else if (ql) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |---------------| */
+/*            |V_{1,1} V_{1,2}| */
+/*            |V_{2,1} V_{2,2}| */
+/*            |0       V_{3,2}| */
+/*            |---------------| */
+
+/*        V_{1,1}\in\R^{n-k,k-l}  rectangular */
+/*        V_{2,1}\in\R^{k-l,k-l}  unit upper triangular */
+
+/*        V_{1,2}\in\R^{n-k,l}    rectangular */
+/*        V_{2,2}\in\R^{k-l,l}    rectangular */
+/*        V_{3,2}\in\R^{l,l}      unit upper triangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} 0      | */
+/*            |T_{2,1} T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */
+/*        T_{2,2}\in\R^{l, l}     non-unit lower triangular */
+/*        T_{2,1}\in\R^{k-l, l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */
+/*        = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */
+
+/*        Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |-------| */
+/*            |V_1 V_2| */
+/*            |-------| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V*T*V' */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{2,1} */
+
+/*        Compute T_{1,1} recursively */
+
+	i__2 = *n - l;
+	i__3 = *k - l;
+	slarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	slarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, &
+		tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/*        Compute T_{2,1} */
+/*        T_{2,1} = V_{2,2}' */
+
+	i__2 = *k - l;
+	for (j = 1; j <= i__2; ++j) {
+	    i__3 = l;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		t[*k - l + i__ + j * t_dim1] = v[*n - *k + j + (*k - l + i__) 
+			* v_dim1];
 	    }
 	}
-    }
-    return;
 
-/*     End of SLARFT */
+/*        T_{2,1} = T_{2,1}*V_{2,1} */
+
+	i__2 = *k - l;
+	strmm_("Right", "Upper", "No transpose", "Unit", &l, &i__2, &c_b13, &
+		v[*n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
 
+	i__2 = *k - l;
+	i__3 = *n - *k;
+	sgemm_("Transpose", "No transpose", &l, &i__2, &i__3, &c_b13, &v[(*k 
+		- l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b13, &t[*k 
+		- l + 1 + t_dim1], ldt);
+
+/*        At this point, we have that T_{2,1} = V_2'*V_1 */
+/*        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/*        respectively. */
+
+/*        T_{2,1} = -T_{2,2}*T_{2,1} */
+
+	i__2 = *k - l;
+	strmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__2, &c_b22,
+		 &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + 
+		t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*T_{1,1} */
+
+	i__2 = *k - l;
+	strmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__2, &
+		c_b13, &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+    } else {
+
+/*        Else means RQ case */
+
+/*        Break V apart into 6 components */
+
+/*        V = |-----------------------| */
+/*            |V_{1,1} V_{1,2} 0      | */
+/*            |V_{2,1} V_{2,2} V_{2,3}| */
+/*            |-----------------------| */
+
+/*        V_{1,1}\in\R^{k-l,n-k}  rectangular */
+/*        V_{1,2}\in\R^{k-l,k-l}  unit lower triangular */
+
+/*        V_{2,1}\in\R^{l,n-k}    rectangular */
+/*        V_{2,2}\in\R^{l,k-l}    rectangular */
+/*        V_{2,3}\in\R^{l,l}      unit lower triangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} 0      | */
+/*            |T_{2,1} T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular */
+/*        T_{2,2}\in\R^{l, l}     non-unit lower triangular */
+/*        T_{2,1}\in\R^{k-l, l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */
+/*        = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */
+
+/*        Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |---| */
+/*            |V_1| */
+/*            |V_2| */
+/*            |---| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V'TV */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{2,1} */
+
+/*        Compute T_{1,1} recursively */
+
+	i__2 = *n - l;
+	i__3 = *k - l;
+	slarft_(direct, storev, &i__2, &i__3, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	slarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k 
+		- l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/*        Compute T_{2,1} */
+/*        T_{2,1} = V_{2,2} */
+
+	i__2 = *k - l;
+	slacpy_("All", &l, &i__2, &v[*k - l + 1 + (*n - *k + 1) * v_dim1], 
+		ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*V_{1,2}' */
+
+	i__2 = *k - l;
+	strmm_("Right", "Lower", "Transpose", "Unit", &l, &i__2, &c_b13, &v[(*
+		n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__2 = *k - l;
+	i__3 = *n - *k;
+	sgemm_("No transpose", "Transpose", &l, &i__2, &i__3, &c_b13, &v[*k - 
+		l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b13, &t[*k - l + 
+		1 + t_dim1], ldt);
+
+/*        At this point, we have that T_{2,1} = V_2*V_1' */
+/*        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/*        respectively. */
+
+/*        T_{2,1} = -T_{2,2}*T_{2,1} */
+
+	i__2 = *k - l;
+	strmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b22, 
+		&t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + 
+		t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*T_{1,1} */
+
+	i__2 = *k - l;
+	strmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__2, &c_b13,
+		 &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+    }
+    return 0;
 } /* slarft_ */
 
diff --git a/lapack-netlib/SRC/slarft.f b/lapack-netlib/SRC/slarft.f
index ad3a4d924c..3e0eac751e 100644
--- a/lapack-netlib/SRC/slarft.f
+++ b/lapack-netlib/SRC/slarft.f
@@ -5,7 +5,6 @@
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
-*> \htmlonly
 *> Download SLARFT + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slarft.f">
 *> [TGZ]</a>
@@ -13,7 +12,6 @@
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slarft.f">
 *> [TXT]</a>
-*> \endhtmlonly
 *
 *  Definition:
 *  ===========
@@ -161,6 +159,7 @@
 *  =====================================================================
       RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV,
      $                             TAU, T, LDT )
+      IMPLICIT NONE
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
@@ -183,7 +182,7 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV,
 *
 *     .. Local Scalars ..
 *
-      INTEGER            I,J,L
+      INTEGER            I,J,L,NX
       LOGICAL            QR,LQ,QL,DIRF,COLV
 *
 *     .. External Subroutines ..
@@ -193,7 +192,8 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV,
 *     .. External Functions..
 *
       LOGICAL            LSAME
-      EXTERNAL           LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
 *     
 *     The general scheme used is inspired by the approach inside DGEQRT3
 *     which was (at the time of writing this code):
@@ -215,6 +215,14 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV,
          RETURN
       END IF
 *
+*     Determine when to cross over into the level 2 based implementation
+*
+      NX = ILAENV(3, "SLARFT", DIRECT // STOREV, N, K, -1, -1)
+      IF(K.LT.NX) THEN
+         CALL SLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
+         RETURN
+      END IF
+*
 *     Beginning of executable statements
 *
       L = K / 2
diff --git a/lapack-netlib/SRC/slarft_lvl2.c b/lapack-netlib/SRC/slarft_lvl2.c
new file mode 100644
index 0000000000..d1b8f31b65
--- /dev/null
+++ b/lapack-netlib/SRC/slarft_lvl2.c
@@ -0,0 +1,896 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef blasint logical;
+
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				+ Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i])._Val[1] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				+ Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				- Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]
+				- Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]
+				- Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]
+				+ Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				- Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				+ Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]
+				- Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]
+				+ Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]
+				- Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]
+				+ Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b6 = 1.f;
+
+/* > \brief \b SLARFT_LVL2: Level 2 BLAS version for terminating case of SLARFT. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > Download SLARFT_LVL2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slarft.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slarft.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slarft.
+f"> */
+/* > [TXT]</a> */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */
+/*                    T, LDT ) */
+
+/*       CHARACTER          DIRECT, STOREV */
+/*       INTEGER            K, LDT, LDV, N */
+/*       REAL               T( LDT, * ), TAU( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLARFT_LVL2 forms the triangular factor T of a real block reflector H */
+/* > of order n, which is defined as a product of k elementary reflectors. */
+/* > */
+/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
+/* > */
+/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
+/* > */
+/* > If STOREV = 'C', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th column of the array V, and */
+/* > */
+/* >    H  =  I - V * T * V**T */
+/* > */
+/* > If STOREV = 'R', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th row of the array V, and */
+/* > */
+/* >    H  =  I - V**T * T * V */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] DIRECT */
+/* > \verbatim */
+/* >          DIRECT is CHARACTER*1 */
+/* >          Specifies the order in which the elementary reflectors are */
+/* >          multiplied to form the block reflector: */
+/* >          = 'F': H = H(1) H(2) . . . H(k) (Forward) */
+/* >          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] STOREV */
+/* > \verbatim */
+/* >          STOREV is CHARACTER*1 */
+/* >          Specifies how the vectors which define the elementary */
+/* >          reflectors are stored (see also Further Details): */
+/* >          = 'C': columnwise */
+/* >          = 'R': rowwise */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the block reflector H. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The order of the triangular factor T (= the number of */
+/* >          elementary reflectors). K >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension */
+/* >                               (LDV,K) if STOREV = 'C' */
+/* >                               (LDV,N) if STOREV = 'R' */
+/* >          The matrix V. See further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. */
+/* >          If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (K) */
+/* >          TAU(i) must contain the scalar factor of the elementary */
+/* >          reflector H(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT,K) */
+/* >          The k by k triangular factor T of the block reflector. */
+/* >          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
+/* >          lower triangular. The rest of the array is not used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T. LDT >= K. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup larft */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The shape of the matrix V and the storage of the vectors which define */
+/* >  the H(i) is best illustrated by the following example with n = 5 and */
+/* >  k = 3. The elements equal to 1 are not stored. */
+/* > */
+/* >  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R': */
+/* > */
+/* >               V = (  1       )                 V = (  1 v1 v1 v1 v1 ) */
+/* >                   ( v1  1    )                     (     1 v2 v2 v2 ) */
+/* >                   ( v1 v2  1 )                     (        1 v3 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R': */
+/* > */
+/* >               V = ( v1 v2 v3 )                 V = ( v1 v1  1       ) */
+/* >                   ( v1 v2 v3 )                     ( v2 v2 v2  1    ) */
+/* >                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 ) */
+/* >                   (     1 v3 ) */
+/* >                   (        1 ) */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int slarft_lvl2__(char *direct, char *storev, integer *n, 
+	integer *k, real *v, integer *ldv, real *tau, real *t, integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Local variables */
+    integer i__, j, prevlastv;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    integer lastv;
+    extern /* Subroutine */ int strmv_(char *, char *, char *, integer *, 
+	    real *, integer *, real *, integer *), 
+	    mecago_();
+
+
+/*  -- LAPACK auxiliary routine -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+
+/*  ===================================================================== */
+
+
+/*     Quick return if possible */
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	prevlastv = *n;
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    prevlastv = f2cmax(i__,prevlastv);
+	    if (tau[i__] == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    t[j + i__ * t_dim1] = 0.f;
+		}
+	    } else {
+
+/*              general case */
+
+		if (lsame_(storev, "C")) {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			if (v[lastv + i__ * v_dim1] != 0.f) {
+			    myexit_();
+			}
+		    }
+		    i__2 = i__ - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1];
+		    }
+		    j = f2cmin(lastv,prevlastv);
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */
+
+		    i__2 = j - i__;
+		    i__3 = i__ - 1;
+		    r__1 = -tau[i__];
+		    sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + 1 + 
+			    v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, &
+			    c_b6, &t[i__ * t_dim1 + 1], &c__1);
+		} else {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			if (v[i__ + lastv * v_dim1] != 0.f) {
+			    myexit_();
+			}
+		    }
+		    i__2 = i__ - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1];
+		    }
+		    j = f2cmin(lastv,prevlastv);
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */
+
+		    i__2 = i__ - 1;
+		    i__3 = j - i__;
+		    r__1 = -tau[i__];
+		    sgemv_("No transpose", &i__2, &i__3, &r__1, &v[(i__ + 1) *
+			     v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], 
+			    ldv, &c_b6, &t[i__ * t_dim1 + 1], &c__1);
+		}
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		t[i__ + i__ * t_dim1] = tau[i__];
+		if (i__ > 1) {
+		    prevlastv = f2cmax(prevlastv,lastv);
+		} else {
+		    prevlastv = lastv;
+		}
+	    }
+	}
+    } else {
+	prevlastv = 1;
+	for (i__ = *k; i__ >= 1; --i__) {
+	    if (tau[i__] == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    t[j + i__ * t_dim1] = 0.f;
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    if (v[lastv + i__ * v_dim1] != 0.f) {
+				myexit_();
+			    }
+			}
+			i__1 = *k;
+			for (j = i__ + 1; j <= i__1; ++j) {
+			    t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__ 
+				    + j * v_dim1];
+			}
+			j = f2cmax(lastv,prevlastv);
+
+/*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */
+
+			i__1 = *n - *k + i__ - j;
+			i__2 = *k - i__;
+			r__1 = -tau[i__];
+			sgemv_("Transpose", &i__1, &i__2, &r__1, &v[j + (i__ 
+				+ 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
+				c__1, &c_b6, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
+		    } else {
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    if (v[i__ + lastv * v_dim1] != 0.f) {
+				myexit_();
+			    }
+			}
+			i__1 = *k;
+			for (j = i__ + 1; j <= i__1; ++j) {
+			    t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k 
+				    + i__) * v_dim1];
+			}
+			j = f2cmax(lastv,prevlastv);
+
+/*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */
+
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__ - j;
+			r__1 = -tau[i__];
+			sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ + 
+				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], 
+				ldv, &c_b6, &t[i__ + 1 + i__ * t_dim1], &c__1);
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ 
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		    if (i__ > 1) {
+			prevlastv = f2cmin(prevlastv,lastv);
+		    } else {
+			prevlastv = lastv;
+		    }
+		}
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+	}
+    }
+    return 0;
+
+/*     End of SLARFT_LVL2 */
+
+} /* slarft_lvl2__ */
+
diff --git a/lapack-netlib/SRC/slarft_lvl2.f b/lapack-netlib/SRC/slarft_lvl2.f
new file mode 100644
index 0000000000..7107a91d5d
--- /dev/null
+++ b/lapack-netlib/SRC/slarft_lvl2.f
@@ -0,0 +1,326 @@
+*> \brief \b SLARFT_LVL2: Level 2 BLAS version for terminating case of SLARFT.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> Download SLARFT_LVL2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slarft.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slarft.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slarft.f">
+*> [TXT]</a>
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+*                    T, LDT )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          DIRECT, STOREV
+*       INTEGER            K, LDT, LDV, N
+*       ..
+*       .. Array Arguments ..
+*       REAL               T( LDT, * ), TAU( * ), V( LDV, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> SLARFT_LVL2 forms the triangular factor T of a real block reflector H
+*> of order n, which is defined as a product of k elementary reflectors.
+*>
+*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*>
+*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*>
+*> If STOREV = 'C', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th column of the array V, and
+*>
+*>    H  =  I - V * T * V**T
+*>
+*> If STOREV = 'R', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th row of the array V, and
+*>
+*>    H  =  I - V**T * T * V
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] DIRECT
+*> \verbatim
+*>          DIRECT is CHARACTER*1
+*>          Specifies the order in which the elementary reflectors are
+*>          multiplied to form the block reflector:
+*>          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*>          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*> \endverbatim
+*>
+*> \param[in] STOREV
+*> \verbatim
+*>          STOREV is CHARACTER*1
+*>          Specifies how the vectors which define the elementary
+*>          reflectors are stored (see also Further Details):
+*>          = 'C': columnwise
+*>          = 'R': rowwise
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the block reflector H. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER
+*>          The order of the triangular factor T (= the number of
+*>          elementary reflectors). K >= 1.
+*> \endverbatim
+*>
+*> \param[in] V
+*> \verbatim
+*>          V is REAL array, dimension
+*>                               (LDV,K) if STOREV = 'C'
+*>                               (LDV,N) if STOREV = 'R'
+*>          The matrix V. See further details.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*>          LDV is INTEGER
+*>          The leading dimension of the array V.
+*>          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*> \endverbatim
+*>
+*> \param[in] TAU
+*> \verbatim
+*>          TAU is REAL array, dimension (K)
+*>          TAU(i) must contain the scalar factor of the elementary
+*>          reflector H(i).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*>          T is REAL array, dimension (LDT,K)
+*>          The k by k triangular factor T of the block reflector.
+*>          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*>          lower triangular. The rest of the array is not used.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*>          LDT is INTEGER
+*>          The leading dimension of the array T. LDT >= K.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup larft
+*
+*> \par Further Details:
+*  =====================
+*>
+*> \verbatim
+*>
+*>  The shape of the matrix V and the storage of the vectors which define
+*>  the H(i) is best illustrated by the following example with n = 5 and
+*>  k = 3. The elements equal to 1 are not stored.
+*>
+*>  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*>
+*>               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+*>                   ( v1  1    )                     (     1 v2 v2 v2 )
+*>                   ( v1 v2  1 )                     (        1 v3 v3 )
+*>                   ( v1 v2 v3 )
+*>                   ( v1 v2 v3 )
+*>
+*>  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*>
+*>               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+*>                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+*>                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+*>                   (     1 v3 )
+*>                   (        1 )
+*> \endverbatim
+*>
+*  =====================================================================
+      SUBROUTINE SLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+     $            T, LDT )
+*
+*  -- LAPACK auxiliary routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      REAL               T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, PREVLASTV, LASTV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, STRMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( DIRECT, 'F' ) ) THEN
+         PREVLASTV = N
+         DO I = 1, K
+            PREVLASTV = MAX( I, PREVLASTV )
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = 1, I
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( LSAME( STOREV, 'C' ) ) THEN
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( LASTV, I ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * V( I , J )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
+*
+                  CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ),
+     $                        V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE,
+     $                        T( 1, I ), 1 )
+               ELSE
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( I, LASTV ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * V( J , I )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
+*
+                  CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ),
+     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
+     $                        ONE, T( 1, I ), 1 )
+               END IF
+*
+*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+               CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
+     $                     T,
+     $                     LDT, T( 1, I ), 1 )
+               T( I, I ) = TAU( I )
+               IF( I.GT.1 ) THEN
+                  PREVLASTV = MAX( PREVLASTV, LASTV )
+               ELSE
+                  PREVLASTV = LASTV
+               END IF
+            END IF
+         END DO
+      ELSE
+         PREVLASTV = 1
+         DO I = K, 1, -1
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = I, K
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( I.LT.K ) THEN
+                  IF( LSAME( STOREV, 'C' ) ) THEN
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( LASTV, I ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * V( N-K+I , J )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
+*
+                     CALL SGEMV( 'Transpose', N-K+I-J, K-I,
+     $                           -TAU( I ),
+     $                           V( J, I+1 ), LDV, V( J, I ), 1, ONE,
+     $                           T( I+1, I ), 1 )
+                  ELSE
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( I, LASTV ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
+*
+                     CALL SGEMV( 'No transpose', K-I, N-K+I-J,
+     $                    -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV,
+     $                    ONE, T( I+1, I ), 1 )
+                  END IF
+*
+*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+                  CALL STRMV( 'Lower', 'No transpose', 'Non-unit',
+     $                        K-I,
+     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+                  IF( I.GT.1 ) THEN
+                     PREVLASTV = MIN( PREVLASTV, LASTV )
+                  ELSE
+                     PREVLASTV = LASTV
+                  END IF
+               END IF
+               T( I, I ) = TAU( I )
+            END IF
+         END DO
+      END IF
+      RETURN
+*
+*     End of SLARFT_LVL2
+*
+      END
diff --git a/lapack-netlib/SRC/zlarft.c b/lapack-netlib/SRC/zlarft.c
index f8ca30b318..404f96358c 100644
--- a/lapack-netlib/SRC/zlarft.c
+++ b/lapack-netlib/SRC/zlarft.c
@@ -247,7 +247,6 @@ typedef struct Namelist Namelist;
 #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
 #define sig_die(s, kill) { exit(1); }
 #define s_stop(s, n) {exit(0);}
-static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
 #define z_abs(z) (cabs(Cd(z)))
 #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
 #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
@@ -420,16 +419,16 @@ static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 	_Dcomplex zdotc = {0.0, 0.0};
 	if (incx == 1 && incy == 1) {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]
-				+ Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
-			zdotc._Val[1] -= Cd(&x[i])._Val[1] * Cd(&y[i])._Val[0]
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]
 				- Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
 		}
 	} else {
 		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
-			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]
 				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
-			zdotc._Val[1] -= Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[0]
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]
 				- Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
 		}
 	}
@@ -525,12 +524,13 @@ static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integ
 */
 
 
-
-
 /* Table of constant values */
 
 static doublecomplex c_b1 = {1.,0.};
-static integer c__1 = 1;
+static doublecomplex c_b3 = {-1.,0.};
+static integer c__3 = 3;
+static integer c_n1 = -1;
+static integer c__2 = 2;
 
 /* > \brief \b ZLARFT forms the triangular factor T of a block reflector H = I - vtvH */
 
@@ -539,7 +539,6 @@ static integer c__1 = 1;
 /* Online html documentation available at */
 /*            http://www.netlib.org/lapack/explore-html/ */
 
-/* > \htmlonly */
 /* > Download ZLARFT + dependencies */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarft.
 f"> */
@@ -550,12 +549,11 @@ f"> */
 /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarft.
 f"> */
 /* > [TXT]</a> */
-/* > \endhtmlonly */
 
 /*  Definition: */
 /*  =========== */
 
-/*       SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
+/*        SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */
 
 /*       CHARACTER          DIRECT, STOREV */
 /*       INTEGER            K, LDT, LDV, N */
@@ -663,9 +661,7 @@ f"> */
 /* > \author Univ. of Colorado Denver */
 /* > \author NAG Ltd. */
 
-/* > \date June 2016 */
-
-/* > \ingroup complex16OTHERauxiliary */
+/* > \ingroup larft */
 
 /* > \par Further Details: */
 /*  ===================== */
@@ -694,257 +690,542 @@ f"> */
 /* > \endverbatim */
 /* > */
 /*  ===================================================================== */
-/* Subroutine */ void zlarft_(char *direct, char *storev, integer *n, integer *
+/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer *
 	k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex *
 	t, integer *ldt)
 {
     /* System generated locals */
-    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
+    address a__1[2];
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2[2], i__3, i__4;
+    doublecomplex z__1;
+    char ch__1[2];
 
     /* Local variables */
-    integer i__, j;
+    integer i__, j, l;
+    logical lq, ql, qr;
+    integer nx;
+    extern /* Subroutine */ int zlarft_lvl2__(char *, char *, integer *, 
+	    integer *, doublecomplex *, integer *, doublecomplex *, 
+	    doublecomplex *, integer *);
+    logical dirf, colv;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ void zgemm_(char *, char *, integer *, integer *, 
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
 	    integer *, doublecomplex *, doublecomplex *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
-	    integer *), zgemv_(char *, integer *, integer *, 
-	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
-	    integer *, doublecomplex *, doublecomplex *, integer *);
-    integer lastv;
-    extern /* Subroutine */ void ztrmv_(char *, char *, char *, integer *, 
+	    integer *), ztrmm_(char *, char *, char *, char *,
+	     integer *, integer *, doublecomplex *, doublecomplex *, integer *
+	    , doublecomplex *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, 
 	    doublecomplex *, integer *, doublecomplex *, integer *);
-    integer prevlastv;
-    extern /* Subroutine */ void mecago_();
 
 
-/*  -- LAPACK auxiliary routine (version 3.7.0) -- */
+/*  -- LAPACK auxiliary routine -- */
 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
-/*     June 2016 */
 
 
-/*  ===================================================================== */
 
 
+
+
+
+
+
+
+
+
+
+
+/*     The general scheme used is inspired by the approach inside DGEQRT3 */
+/*     which was (at the time of writing this code): */
+/*     Based on the algorithm of Elmroth and Gustavson, */
+/*     IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+
 /*     Quick return if possible */
 
     /* Parameter adjustments */
     v_dim1 = *ldv;
-    v_offset = 1 + v_dim1 * 1;
+    v_offset = 1 + v_dim1;
     v -= v_offset;
     --tau;
     t_dim1 = *ldt;
-    t_offset = 1 + t_dim1 * 1;
+    t_offset = 1 + t_dim1;
     t -= t_offset;
 
     /* Function Body */
-    if (*n == 0) {
-	return;
+    if (*n == 0 || *k == 0) {
+	return 0;
+    }
+
+/*     Base case */
+
+    if (*n == 1 || *k == 1) {
+	i__1 = t_dim1 + 1;
+	t[i__1].r = tau[1].r, t[i__1].i = tau[1].i;
+	return 0;
     }
 
-    if (lsame_(direct, "F")) {
-	prevlastv = *n;
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    prevlastv = f2cmax(prevlastv,i__);
-	    i__2 = i__;
-	    if (tau[i__2].r == 0. && tau[i__2].i == 0.) {
+/*     Determine when to cross over into the level 2 based implementation */
+
+/* Writing concatenation */
+    i__2[0] = 1, a__1[0] = direct;
+    i__2[1] = 1, a__1[1] = storev;
+    s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+    nx = ilaenv_(&c__3, "ZLARFT", ch__1, n, k, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (*k < nx) {
+	zlarft_lvl2__(direct, storev, n, k, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+	return 0;
+    }
 
-/*              H(i)  =  I */
+/*     Beginning of executable statements */
 
-		i__2 = i__;
-		for (j = 1; j <= i__2; ++j) {
-		    i__3 = j + i__ * t_dim1;
-		    t[i__3].r = 0., t[i__3].i = 0.;
-		}
-	    } else {
-
-/*              general case */
-
-		if (lsame_(storev, "C")) {
-/*                 Skip any trailing zeros. */
-		    i__2 = i__ + 1;
-		    for (lastv = *n; lastv >= i__2; --lastv) {
-			i__3 = lastv + i__ * v_dim1;
-			if (v[i__3].r != 0. || v[i__3].i != 0.) {
-			    myexit_();
-			}
-		    }
-		    i__2 = i__ - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			i__3 = j + i__ * t_dim1;
-			i__4 = i__;
-			z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i;
-			d_cnjg(&z__3, &v[i__ + j * v_dim1]);
-			z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = 
-				z__2.r * z__3.i + z__2.i * z__3.r;
-			t[i__3].r = z__1.r, t[i__3].i = z__1.i;
-		    }
-		    j = f2cmin(lastv,prevlastv);
-
-/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */
-
-		    i__2 = j - i__;
-		    i__3 = i__ - 1;
-		    i__4 = i__;
-		    z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
-		    zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__ 
-			    + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &
-			    c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1);
-		} else {
-/*                 Skip any trailing zeros. */
-		    i__2 = i__ + 1;
-		    for (lastv = *n; lastv >= i__2; --lastv) {
-			i__3 = i__ + lastv * v_dim1;
-			if (v[i__3].r != 0. || v[i__3].i != 0.) {
-			    myexit_();
-			}
-		    }
-		    i__2 = i__ - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			i__3 = j + i__ * t_dim1;
-			i__4 = i__;
-			z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i;
-			i__5 = j + i__ * v_dim1;
-			z__1.r = z__2.r * v[i__5].r - z__2.i * v[i__5].i, 
-				z__1.i = z__2.r * v[i__5].i + z__2.i * v[i__5]
-				.r;
-			t[i__3].r = z__1.r, t[i__3].i = z__1.i;
-		    }
-		    j = f2cmin(lastv,prevlastv);
-
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */
-
-		    i__2 = i__ - 1;
-		    i__3 = j - i__;
-		    i__4 = i__;
-		    z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
-		    zgemm_("N", "C", &i__2, &c__1, &i__3, &z__1, &v[(i__ + 1) 
-			    * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], 
-			    ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt);
-		}
+    l = *k / 2;
 
-/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
-
-		i__2 = i__ - 1;
-		ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
-			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
-		i__2 = i__ + i__ * t_dim1;
-		i__3 = i__;
-		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
-		if (i__ > 1) {
-		    prevlastv = f2cmax(prevlastv,lastv);
-		} else {
-		    prevlastv = lastv;
-		}
+/*     Determine what kind of Q we need to compute */
+/*     We assume that if the user doesn't provide 'F' for DIRECT, */
+/*     then they meant to provide 'B' and if they don't provide */
+/*     'C' for STOREV, then they meant to provide 'R' */
+
+    dirf = lsame_(direct, "F");
+    colv = lsame_(storev, "C");
+
+/*     QR happens when we have forward direction in column storage */
+
+    qr = dirf && colv;
+
+/*     LQ happens when we have forward direction in row storage */
+
+    lq = dirf && ! colv;
+
+/*     QL happens when we have backward direction in column storage */
+
+    ql = ! dirf && colv;
+
+/*     The last case is RQ. Due to how we structured this, if the */
+/*     above 3 are false, then RQ must be true, so we never store */
+/*     this */
+/*     RQ happens when we have backward direction in row storage */
+/*     RQ = (.NOT.DIRF).AND.(.NOT.COLV) */
+
+    if (qr) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |---------------| */
+/*            |V_{1,1} 0      | */
+/*            |V_{2,1} V_{2,2}| */
+/*            |V_{3,1} V_{3,2}| */
+/*            |---------------| */
+
+/*        V_{1,1}\in\C^{l,l}      unit lower triangular */
+/*        V_{2,1}\in\C^{k-l,l}    rectangular */
+/*        V_{3,1}\in\C^{n-k,l}    rectangular */
+
+/*        V_{2,2}\in\C^{k-l,k-l}  unit lower triangular */
+/*        V_{3,2}\in\C^{n-k,k-l}  rectangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} T_{1,2}| */
+/*            |0       T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\C^{l, l}     upper triangular */
+/*        T_{2,2}\in\C^{k-l, k-l} upper triangular */
+/*        T_{1,2}\in\C^{l, k-l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') */
+/*        = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' */
+
+/*        Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |-------| */
+/*            |V_1 V_2| */
+/*            |-------| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V*T*V' */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{1,2} */
+
+/*        Compute T_{1,1} recursively */
+
+	zlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	i__1 = *n - l;
+	i__3 = *k - l;
+	zlarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1], 
+		ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/*        Compute T_{1,2} */
+/*        T_{1,2} = V_{2,1}' */
+
+	i__1 = l;
+	for (j = 1; j <= i__1; ++j) {
+	    i__3 = *k - l;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		i__4 = j + (l + i__) * t_dim1;
+		d_cnjg(&z__1, &v[l + i__ + j * v_dim1]);
+		t[i__4].r = z__1.r, t[i__4].i = z__1.i;
 	    }
 	}
-    } else {
-	prevlastv = 1;
-	for (i__ = *k; i__ >= 1; --i__) {
-	    i__1 = i__;
-	    if (tau[i__1].r == 0. && tau[i__1].i == 0.) {
 
-/*              H(i)  =  I */
+/*        T_{1,2} = T_{1,2}*V_{2,2} */
 
-		i__1 = *k;
-		for (j = i__; j <= i__1; ++j) {
-		    i__2 = j + i__ * t_dim1;
-		    t[i__2].r = 0., t[i__2].i = 0.;
-		}
-	    } else {
-
-/*              general case */
-
-		if (i__ < *k) {
-		    if (lsame_(storev, "C")) {
-/*                    Skip any leading zeros. */
-			i__1 = i__ - 1;
-			for (lastv = 1; lastv <= i__1; ++lastv) {
-			    i__2 = lastv + i__ * v_dim1;
-			    if (v[i__2].r != 0. || v[i__2].i != 0.) {
-				myexit_();
-			    }
-			}
-			i__1 = *k;
-			for (j = i__ + 1; j <= i__1; ++j) {
-			    i__2 = j + i__ * t_dim1;
-			    i__3 = i__;
-			    z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i;
-			    d_cnjg(&z__3, &v[*n - *k + i__ + j * v_dim1]);
-			    z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, 
-				    z__1.i = z__2.r * z__3.i + z__2.i * 
-				    z__3.r;
-			    t[i__2].r = z__1.r, t[i__2].i = z__1.i;
-			}
-			j = f2cmax(lastv,prevlastv);
-
-/*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */
-
-			i__1 = *n - *k + i__ - j;
-			i__2 = *k - i__;
-			i__3 = i__;
-			z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
-			zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[
-				j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * 
-				v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ * 
-				t_dim1], &c__1);
-		    } else {
-/*                    Skip any leading zeros. */
-			i__1 = i__ - 1;
-			for (lastv = 1; lastv <= i__1; ++lastv) {
-			    i__2 = i__ + lastv * v_dim1;
-			    if (v[i__2].r != 0. || v[i__2].i != 0.) {
-				myexit_();
-			    }
-			}
-			i__1 = *k;
-			for (j = i__ + 1; j <= i__1; ++j) {
-			    i__2 = j + i__ * t_dim1;
-			    i__3 = i__;
-			    z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i;
-			    i__4 = j + (*n - *k + i__) * v_dim1;
-			    z__1.r = z__2.r * v[i__4].r - z__2.i * v[i__4].i, 
-				    z__1.i = z__2.r * v[i__4].i + z__2.i * v[
-				    i__4].r;
-			    t[i__2].r = z__1.r, t[i__2].i = z__1.i;
-			}
-			j = f2cmax(lastv,prevlastv);
-
-/*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */
-
-			i__1 = *k - i__;
-			i__2 = *n - *k + i__ - j;
-			i__3 = i__;
-			z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
-			zgemm_("N", "C", &i__1, &c__1, &i__2, &z__1, &v[i__ + 
-				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], 
-				ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt);
-		    }
-
-/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
-		    i__1 = *k - i__;
-		    ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ 
-			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
-			     t_dim1], &c__1)
-			    ;
-		    if (i__ > 1) {
-			prevlastv = f2cmin(prevlastv,lastv);
-		    } else {
-			prevlastv = lastv;
-		    }
-		}
-		i__1 = i__ + i__ * t_dim1;
-		i__2 = i__;
-		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+	i__1 = *k - l;
+	ztrmm_("Right", "Lower", "No transpose", "Unit", &l, &i__1, &c_b1, &v[
+		l + 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__1 = *k - l;
+	i__3 = *n - *k;
+	zgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[*k + 
+		1 + v_dim1], ldv, &v[*k + 1 + (l + 1) * v_dim1], ldv, &c_b1, &
+		t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        At this point, we have that T_{1,2} = V_1'*V_2 */
+/*        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/*        respectively. */
+
+/*        T_{1,2} = -T_{1,1}*T_{1,2} */
+
+	i__1 = *k - l;
+	ztrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3, 
+		&t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*T_{2,2} */
+
+	i__1 = *k - l;
+	ztrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+		 &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1], 
+		ldt);
+    } else if (lq) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |----------------------| */
+/*            |V_{1,1} V_{1,2} V{1,3}| */
+/*            |0       V_{2,2} V{2,3}| */
+/*            |----------------------| */
+
+/*        V_{1,1}\in\C^{l,l}      unit upper triangular */
+/*        V_{1,2}\in\C^{l,k-l}    rectangular */
+/*        V_{1,3}\in\C^{l,n-k}    rectangular */
+
+/*        V_{2,2}\in\C^{k-l,k-l}  unit upper triangular */
+/*        V_{2,3}\in\C^{k-l,n-k}  rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} T_{1,2}| */
+/*            |0       T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\C^{l, l}         upper triangular */
+/*        T_{2,2}\in\C^{k-l, k-l}     upper triangular */
+/*        T_{1,2}\in\C^{l, k-l}       rectangular */
+
+/*        Then, consider the product: */
+
+/*        (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) */
+/*        = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 */
+
+/*        Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |---| */
+/*            |V_1| */
+/*            |V_2| */
+/*            |---| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V'*T*V */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{1,2} */
+
+/*        Compute T_{1,1} recursively */
+
+	zlarft_(direct, storev, n, &l, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	i__1 = *n - l;
+	i__3 = *k - l;
+	zlarft_(direct, storev, &i__1, &i__3, &v[l + 1 + (l + 1) * v_dim1], 
+		ldv, &tau[l + 1], &t[l + 1 + (l + 1) * t_dim1], ldt);
+
+/*        Compute T_{1,2} */
+/*        T_{1,2} = V_{1,2} */
+
+	i__1 = *k - l;
+	zlacpy_("All", &l, &i__1, &v[(l + 1) * v_dim1 + 1], ldv, &t[(l + 1) * 
+		t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*V_{2,2}' */
+
+	i__1 = *k - l;
+	ztrmm_("Right", "Upper", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[l 
+		+ 1 + (l + 1) * v_dim1], ldv, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__1 = *k - l;
+	i__3 = *n - *k;
+	zgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[(*k + 
+		1) * v_dim1 + 1], ldv, &v[l + 1 + (*k + 1) * v_dim1], ldv, &
+		c_b1, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        At this point, we have that T_{1,2} = V_1*V_2' */
+/*        All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} */
+/*        respectively. */
+
+/*        T_{1,2} = -T_{1,1}*T_{1,2} */
+
+	i__1 = *k - l;
+	ztrmm_("Left", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b3, 
+		&t[t_offset], ldt, &t[(l + 1) * t_dim1 + 1], ldt);
+
+/*        T_{1,2} = T_{1,2}*T_{2,2} */
+
+	i__1 = *k - l;
+	ztrmm_("Right", "Upper", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+		 &t[l + 1 + (l + 1) * t_dim1], ldt, &t[(l + 1) * t_dim1 + 1], 
+		ldt);
+    } else if (ql) {
+
+/*        Break V apart into 6 components */
+
+/*        V = |---------------| */
+/*            |V_{1,1} V_{1,2}| */
+/*            |V_{2,1} V_{2,2}| */
+/*            |0       V_{3,2}| */
+/*            |---------------| */
+
+/*        V_{1,1}\in\C^{n-k,k-l}  rectangular */
+/*        V_{2,1}\in\C^{k-l,k-l}  unit upper triangular */
+
+/*        V_{1,2}\in\C^{n-k,l}    rectangular */
+/*        V_{2,2}\in\C^{k-l,l}    rectangular */
+/*        V_{3,2}\in\C^{l,l}      unit upper triangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} 0      | */
+/*            |T_{2,1} T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */
+/*        T_{2,2}\in\C^{l, l}     non-unit lower triangular */
+/*        T_{2,1}\in\C^{k-l, l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') */
+/*        = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' */
+
+/*        Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |-------| */
+/*            |V_1 V_2| */
+/*            |-------| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V*T*V' */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{2,1} */
+
+/*        Compute T_{1,1} recursively */
+
+	i__1 = *n - l;
+	i__3 = *k - l;
+	zlarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	zlarft_(direct, storev, n, &l, &v[(*k - l + 1) * v_dim1 + 1], ldv, &
+		tau[*k - l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/*        Compute T_{2,1} */
+/*        T_{2,1} = V_{2,2}' */
+
+	i__1 = *k - l;
+	for (j = 1; j <= i__1; ++j) {
+	    i__3 = l;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		i__4 = *k - l + i__ + j * t_dim1;
+		d_cnjg(&z__1, &v[*n - *k + j + (*k - l + i__) * v_dim1]);
+		t[i__4].r = z__1.r, t[i__4].i = z__1.i;
 	    }
 	}
-    }
-    return;
 
-/*     End of ZLARFT */
+/*        T_{2,1} = T_{2,1}*V_{2,1} */
+
+	i__1 = *k - l;
+	ztrmm_("Right", "Upper", "No transpose", "Unit", &l, &i__1, &c_b1, &v[
+		*n - *k + 1 + v_dim1], ldv, &t[*k - l + 1 + t_dim1], ldt);
 
+/*        T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__1 = *k - l;
+	i__3 = *n - *k;
+	zgemm_("Conjugate", "No transpose", &l, &i__1, &i__3, &c_b1, &v[(*k - 
+		l + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - 
+		l + 1 + t_dim1], ldt);
+
+/*        At this point, we have that T_{2,1} = V_2'*V_1 */
+/*        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/*        respectively. */
+
+/*        T_{2,1} = -T_{2,2}*T_{2,1} */
+
+	i__1 = *k - l;
+	ztrmm_("Left", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b3, 
+		&t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + 
+		t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*T_{1,1} */
+
+	i__1 = *k - l;
+	ztrmm_("Right", "Lower", "No transpose", "Non-unit", &l, &i__1, &c_b1,
+		 &t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+    } else {
+
+/*        Else means RQ case */
+
+/*        Break V apart into 6 components */
+
+/*        V = |-----------------------| */
+/*            |V_{1,1} V_{1,2} 0      | */
+/*            |V_{2,1} V_{2,2} V_{2,3}| */
+/*            |-----------------------| */
+
+/*        V_{1,1}\in\C^{k-l,n-k}  rectangular */
+/*        V_{1,2}\in\C^{k-l,k-l}  unit lower triangular */
+
+/*        V_{2,1}\in\C^{l,n-k}    rectangular */
+/*        V_{2,2}\in\C^{l,k-l}    rectangular */
+/*        V_{2,3}\in\C^{l,l}      unit lower triangular */
+
+/*        We will construct the T matrix */
+/*        T = |---------------| */
+/*            |T_{1,1} 0      | */
+/*            |T_{2,1} T_{2,2}| */
+/*            |---------------| */
+
+/*        T is the triangular factor obtained from block reflectors. */
+/*        To motivate the structure, assume we have already computed T_{1,1} */
+/*        and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 */
+
+/*        T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular */
+/*        T_{2,2}\in\C^{l, l}     non-unit lower triangular */
+/*        T_{2,1}\in\C^{k-l, l}   rectangular */
+
+/*        Where l = floor(k/2) */
+
+/*        Then, consider the product: */
+
+/*        (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) */
+/*        = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 */
+
+/*        Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} */
+
+/*        Then, we can define the matrix V as */
+/*        V = |---| */
+/*            |V_1| */
+/*            |V_2| */
+/*            |---| */
+
+/*        So, our product is equivalent to the matrix product */
+/*        I - V'*T*V */
+/*        This means, we can compute T_{1,1} and T_{2,2}, then use this information */
+/*        to compute T_{2,1} */
+
+/*        Compute T_{1,1} recursively */
+
+	i__1 = *n - l;
+	i__3 = *k - l;
+	zlarft_(direct, storev, &i__1, &i__3, &v[v_offset], ldv, &tau[1], &t[
+		t_offset], ldt);
+
+/*        Compute T_{2,2} recursively */
+
+	zlarft_(direct, storev, n, &l, &v[*k - l + 1 + v_dim1], ldv, &tau[*k 
+		- l + 1], &t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt);
+
+/*        Compute T_{2,1} */
+/*        T_{2,1} = V_{2,2} */
+
+	i__1 = *k - l;
+	zlacpy_("All", &l, &i__1, &v[*k - l + 1 + (*n - *k + 1) * v_dim1], 
+		ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*V_{1,2}' */
+
+	i__1 = *k - l;
+	ztrmm_("Right", "Lower", "Conjugate", "Unit", &l, &i__1, &c_b1, &v[(*
+		n - *k + 1) * v_dim1 + 1], ldv, &t[*k - l + 1 + t_dim1], ldt);
+
+/*        T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} */
+/*        Note: We assume K <= N, and GEMM will do nothing if N=K */
+
+	i__1 = *k - l;
+	i__3 = *n - *k;
+	zgemm_("No transpose", "Conjugate", &l, &i__1, &i__3, &c_b1, &v[*k - 
+		l + 1 + v_dim1], ldv, &v[v_offset], ldv, &c_b1, &t[*k - l + 1 
+		+ t_dim1], ldt);
+
+/*        At this point, we have that T_{2,1} = V_2*V_1' */
+/*        All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} */
+/*        respectively. */
+
+/*        T_{2,1} = -T_{2,2}*T_{2,1} */
+
+	i__1 = *k - l;
+	ztrmm_("Left", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b3, &
+		t[*k - l + 1 + (*k - l + 1) * t_dim1], ldt, &t[*k - l + 1 + 
+		t_dim1], ldt);
+
+/*        T_{2,1} = T_{2,1}*T_{1,1} */
+
+	i__1 = *k - l;
+	ztrmm_("Right", "Lower", "No tranpose", "Non-unit", &l, &i__1, &c_b1, 
+		&t[t_offset], ldt, &t[*k - l + 1 + t_dim1], ldt);
+    }
+    return 0;
 } /* zlarft_ */
 
diff --git a/lapack-netlib/SRC/zlarft.f b/lapack-netlib/SRC/zlarft.f
index 900795afad..efd52037d3 100644
--- a/lapack-netlib/SRC/zlarft.f
+++ b/lapack-netlib/SRC/zlarft.f
@@ -5,7 +5,6 @@
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
-*> \htmlonly
 *> Download ZLARFT + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarft.f">
 *> [TGZ]</a>
@@ -13,7 +12,6 @@
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarft.f">
 *> [TXT]</a>
-*> \endhtmlonly
 *
 *  Definition:
 *  ===========
@@ -161,6 +159,7 @@
 *  =====================================================================
       RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV,
      $                             TAU, T, LDT )
+      IMPLICIT NONE
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
@@ -179,11 +178,12 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV,
 *     .. Parameters ..
 *
       COMPLEX*16        ONE, NEG_ONE, ZERO
-      PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0)
+      PARAMETER(ONE=(1.0D+0,0.0D+0), ZERO = (0.0D+0,0.0D+0),
+     $            NEG_ONE=(-1.0D+0,0.0D+0))
 *
 *     .. Local Scalars ..
 *
-      INTEGER           I,J,L
+      INTEGER           I,J,L,NX
       LOGICAL           QR,LQ,QL,DIRF,COLV
 *
 *     .. External Subroutines ..
@@ -193,7 +193,8 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV,
 *     .. External Functions..
 *
       LOGICAL           LSAME
-      EXTERNAL          LSAME
+      INTEGER           ILAENV
+      EXTERNAL          LSAME, ILAENV
 *
 *     .. Intrinsic Functions..
 *
@@ -219,6 +220,14 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV,
          RETURN
       END IF
 *
+*     Determine when to cross over into the level 2 based implementation
+*
+      NX = ILAENV(3, "ZLARFT", DIRECT // STOREV, N, K, -1, -1)
+      IF(K.LT.NX) THEN
+         CALL ZLARFT_LVL2(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
+         RETURN
+      END IF
+*
 *     Beginning of executable statements
 *
       L = K / 2
diff --git a/lapack-netlib/SRC/zlarft_lvl2.c b/lapack-netlib/SRC/zlarft_lvl2.c
new file mode 100644
index 0000000000..193870627b
--- /dev/null
+++ b/lapack-netlib/SRC/zlarft_lvl2.c
@@ -0,0 +1,941 @@
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+#if defined(_WIN64)
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+#ifdef LAPACK_ILP64
+typedef BLASLONG blasint;
+#if defined(_WIN64)
+#define blasabs(x) llabs(x)
+#else
+#define blasabs(x) labs(x)
+#endif
+#else
+typedef int blasint;
+#define blasabs(x) abs(x)
+#endif
+
+typedef blasint integer;
+
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+#ifdef _MSC_VER
+static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
+static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
+static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
+static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
+#else
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#endif
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef blasint logical;
+
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#ifdef _MSC_VER
+#define c_div(c, a, b) {float nenn=crealf(_FCmulcc(Cf(b),conjf(Cf(b)))); _Fcomplex zaehl=_FCmulcc(Cf(a),conjf(Cf(b))); pCf(c)=_FCbuild(crealf(zaehl)/nenn,cimagf(zaehl)/nenn);}
+#define z_div(c, a, b) {double nenn=creal(_Cmulcc(Cd(b),conj(Cd(b)))); _Dcomplex zaehl=_Cmulcc(Cd(a),conj(Cd(b))); pCd(c)=_Cbuild(creal(zaehl)/nenn,cimag(zaehl)/nenn);}
+#else
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#endif
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimagf(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#ifdef _MSC_VER
+static _Fcomplex cpow_ui(complex x, integer n) {
+	complex pow={1.0,0.0}; unsigned long int u;
+		if(n != 0) {
+		if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
+		for(u = n; ; ) {
+			if(u & 01) pow.r *= x.r, pow.i *= x.i;
+			if(u >>= 1) x.r *= x.r, x.i *= x.i;
+			else break;
+		}
+	}
+	_Fcomplex p={pow.r, pow.i};
+	return p;
+}
+#else
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+#ifdef _MSC_VER
+static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
+	_Dcomplex pow={1.0,0.0}; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
+		for(u = n; ; ) {
+			if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
+			if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
+			else break;
+		}
+	}
+	_Dcomplex p = {pow._Val[0], pow._Val[1]};
+	return p;
+}
+#else
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+#endif
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				+ Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i])._Val[1] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				+ Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] -= Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				- Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1]
+				- Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0]
+				+ Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1]
+				- Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Fcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0]
+				- Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
+			zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1]
+				+ Cf(&x[i])._Val[0] * Cf(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0]
+				- Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1]
+				+ Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[1];
+		}
+	}
+	pCf(z) = zdotc;
+}
+#else
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+#endif
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+#ifdef _MSC_VER
+	_Dcomplex zdotc = {0.0, 0.0};
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0]
+				- Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
+			zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1]
+				+ Cd(&x[i])._Val[0] * Cd(&y[i])._Val[1];
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0]
+				- Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
+			zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1]
+				+ Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[1];
+		}
+	}
+	pCd(z) = zdotc;
+}
+#else
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+/* Table of constant values */
+
+static doublecomplex c_b1 = {1.,0.};
+static integer c__1 = 1;
+
+/* > \brief \b ZLARFT_LVL2: Level 2 BLAS version for terminating case of ZLARFT. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > Download ZLARFT_LVL2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarft.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarft.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarft.
+f"> */
+/* > [TXT]</a> */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU, */
+/*                    T, LDT ) */
+
+/*       CHARACTER          DIRECT, STOREV */
+/*       INTEGER            K, LDT, LDV, N */
+/*       COMPLEX*16         T( LDT, * ), TAU( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > ZLARFT_LVL2 forms the triangular factor T of a complex block reflector H */
+/* > of order n, which is defined as a product of k elementary reflectors. */
+/* > */
+/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
+/* > */
+/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
+/* > */
+/* > If STOREV = 'C', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th column of the array V, and */
+/* > */
+/* >    H  =  I - V * T * V**H */
+/* > */
+/* > If STOREV = 'R', the vector which defines the elementary reflector */
+/* > H(i) is stored in the i-th row of the array V, and */
+/* > */
+/* >    H  =  I - V**H * T * V */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] DIRECT */
+/* > \verbatim */
+/* >          DIRECT is CHARACTER*1 */
+/* >          Specifies the order in which the elementary reflectors are */
+/* >          multiplied to form the block reflector: */
+/* >          = 'F': H = H(1) H(2) . . . H(k) (Forward) */
+/* >          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] STOREV */
+/* > \verbatim */
+/* >          STOREV is CHARACTER*1 */
+/* >          Specifies how the vectors which define the elementary */
+/* >          reflectors are stored (see also Further Details): */
+/* >          = 'C': columnwise */
+/* >          = 'R': rowwise */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the block reflector H. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The order of the triangular factor T (= the number of */
+/* >          elementary reflectors). K >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* >          V is COMPLEX*16 array, dimension */
+/* >                               (LDV,K) if STOREV = 'C' */
+/* >                               (LDV,N) if STOREV = 'R' */
+/* >          The matrix V. See further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. */
+/* >          If STOREV = 'C', LDV >= f2cmax(1,N); if STOREV = 'R', LDV >= K. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TAU */
+/* > \verbatim */
+/* >          TAU is COMPLEX*16 array, dimension (K) */
+/* >          TAU(i) must contain the scalar factor of the elementary */
+/* >          reflector H(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is COMPLEX*16 array, dimension (LDT,K) */
+/* >          The k by k triangular factor T of the block reflector. */
+/* >          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
+/* >          lower triangular. The rest of the array is not used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T. LDT >= K. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup larft */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The shape of the matrix V and the storage of the vectors which define */
+/* >  the H(i) is best illustrated by the following example with n = 5 and */
+/* >  k = 3. The elements equal to 1 are not stored. */
+/* > */
+/* >  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R': */
+/* > */
+/* >               V = (  1       )                 V = (  1 v1 v1 v1 v1 ) */
+/* >                   ( v1  1    )                     (     1 v2 v2 v2 ) */
+/* >                   ( v1 v2  1 )                     (        1 v3 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R': */
+/* > */
+/* >               V = ( v1 v2 v3 )                 V = ( v1 v1  1       ) */
+/* >                   ( v1 v2 v3 )                     ( v2 v2 v2  1    ) */
+/* >                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 ) */
+/* >                   (     1 v3 ) */
+/* >                   (        1 ) */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int zlarft_lvl2__(char *direct, char *storev, integer *n, 
+	integer *k, doublecomplex *v, integer *ldv, doublecomplex *tau, 
+	doublecomplex *t, integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Local variables */
+    integer i__, j, prevlastv;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublecomplex *, doublecomplex *, integer *, 
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
+	    integer *), zgemv_(char *, integer *, integer *, 
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+    integer lastv;
+    extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *, 
+	    doublecomplex *, integer *, doublecomplex *, integer *), mecago_();
+
+
+/*  -- LAPACK auxiliary routine -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+
+/*  ===================================================================== */
+
+
+/*     Quick return if possible */
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	prevlastv = *n;
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    prevlastv = f2cmax(prevlastv,i__);
+	    i__2 = i__;
+	    if (tau[i__2].r == 0. && tau[i__2].i == 0.) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    i__3 = j + i__ * t_dim1;
+		    t[i__3].r = 0., t[i__3].i = 0.;
+		}
+	    } else {
+
+/*              general case */
+
+		if (lsame_(storev, "C")) {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			i__3 = lastv + i__ * v_dim1;
+			if (v[i__3].r != 0. || v[i__3].i != 0.) {
+			    myexit_();
+			}
+		    }
+		    i__2 = i__ - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			i__3 = j + i__ * t_dim1;
+			i__4 = i__;
+			z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i;
+			d_cnjg(&z__3, &v[i__ + j * v_dim1]);
+			z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = 
+				z__2.r * z__3.i + z__2.i * z__3.r;
+			t[i__3].r = z__1.r, t[i__3].i = z__1.i;
+		    }
+		    j = f2cmin(lastv,prevlastv);
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */
+
+		    i__2 = j - i__;
+		    i__3 = i__ - 1;
+		    i__4 = i__;
+		    z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
+		    zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__ 
+			    + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &
+			    c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1);
+		} else {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			i__3 = i__ + lastv * v_dim1;
+			if (v[i__3].r != 0. || v[i__3].i != 0.) {
+			    myexit_();
+			}
+		    }
+		    i__2 = i__ - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			i__3 = j + i__ * t_dim1;
+			i__4 = i__;
+			z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i;
+			i__5 = j + i__ * v_dim1;
+			z__1.r = z__2.r * v[i__5].r - z__2.i * v[i__5].i, 
+				z__1.i = z__2.r * v[i__5].i + z__2.i * v[i__5]
+				.r;
+			t[i__3].r = z__1.r, t[i__3].i = z__1.i;
+		    }
+		    j = f2cmin(lastv,prevlastv);
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */
+
+		    i__2 = i__ - 1;
+		    i__3 = j - i__;
+		    i__4 = i__;
+		    z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
+		    zgemm_("N", "C", &i__2, &c__1, &i__3, &z__1, &v[(i__ + 1) 
+			    * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], 
+			    ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt);
+		}
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		i__2 = i__ + i__ * t_dim1;
+		i__3 = i__;
+		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+		if (i__ > 1) {
+		    prevlastv = f2cmax(prevlastv,lastv);
+		} else {
+		    prevlastv = lastv;
+		}
+	    }
+	}
+    } else {
+	prevlastv = 1;
+	for (i__ = *k; i__ >= 1; --i__) {
+	    i__1 = i__;
+	    if (tau[i__1].r == 0. && tau[i__1].i == 0.) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    i__2 = j + i__ * t_dim1;
+		    t[i__2].r = 0., t[i__2].i = 0.;
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    i__2 = lastv + i__ * v_dim1;
+			    if (v[i__2].r != 0. || v[i__2].i != 0.) {
+				myexit_();
+			    }
+			}
+			i__1 = *k;
+			for (j = i__ + 1; j <= i__1; ++j) {
+			    i__2 = j + i__ * t_dim1;
+			    i__3 = i__;
+			    z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i;
+			    d_cnjg(&z__3, &v[*n - *k + i__ + j * v_dim1]);
+			    z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, 
+				    z__1.i = z__2.r * z__3.i + z__2.i * 
+				    z__3.r;
+			    t[i__2].r = z__1.r, t[i__2].i = z__1.i;
+			}
+			j = f2cmax(lastv,prevlastv);
+
+/*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */
+
+			i__1 = *n - *k + i__ - j;
+			i__2 = *k - i__;
+			i__3 = i__;
+			z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
+			zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[
+				j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * 
+				v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ * 
+				t_dim1], &c__1);
+		    } else {
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    i__2 = i__ + lastv * v_dim1;
+			    if (v[i__2].r != 0. || v[i__2].i != 0.) {
+				myexit_();
+			    }
+			}
+			i__1 = *k;
+			for (j = i__ + 1; j <= i__1; ++j) {
+			    i__2 = j + i__ * t_dim1;
+			    i__3 = i__;
+			    z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i;
+			    i__4 = j + (*n - *k + i__) * v_dim1;
+			    z__1.r = z__2.r * v[i__4].r - z__2.i * v[i__4].i, 
+				    z__1.i = z__2.r * v[i__4].i + z__2.i * v[
+				    i__4].r;
+			    t[i__2].r = z__1.r, t[i__2].i = z__1.i;
+			}
+			j = f2cmax(lastv,prevlastv);
+
+/*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */
+
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__ - j;
+			i__3 = i__;
+			z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
+			zgemm_("N", "C", &i__1, &c__1, &i__2, &z__1, &v[i__ + 
+				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], 
+				ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt);
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ 
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		    if (i__ > 1) {
+			prevlastv = f2cmin(prevlastv,lastv);
+		    } else {
+			prevlastv = lastv;
+		    }
+		}
+		i__1 = i__ + i__ * t_dim1;
+		i__2 = i__;
+		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+	    }
+	}
+    }
+    return 0;
+
+/*     End of ZLARFT_LVL2 */
+
+} /* zlarft_lvl2__ */
+
diff --git a/lapack-netlib/SRC/zlarft_lvl2.f b/lapack-netlib/SRC/zlarft_lvl2.f
new file mode 100644
index 0000000000..808c7fdb25
--- /dev/null
+++ b/lapack-netlib/SRC/zlarft_lvl2.f
@@ -0,0 +1,327 @@
+*> \brief \b ZLARFT_LVL2: Level 2 BLAS version for terminating case of ZLARFT.
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> Download ZLARFT_LVL2 + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarft.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarft.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarft.f">
+*> [TXT]</a>
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+*                    T, LDT )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          DIRECT, STOREV
+*       INTEGER            K, LDT, LDV, N
+*       ..
+*       .. Array Arguments ..
+*       COMPLEX*16         T( LDT, * ), TAU( * ), V( LDV, * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> ZLARFT_LVL2 forms the triangular factor T of a complex block reflector H
+*> of order n, which is defined as a product of k elementary reflectors.
+*>
+*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*>
+*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*>
+*> If STOREV = 'C', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th column of the array V, and
+*>
+*>    H  =  I - V * T * V**H
+*>
+*> If STOREV = 'R', the vector which defines the elementary reflector
+*> H(i) is stored in the i-th row of the array V, and
+*>
+*>    H  =  I - V**H * T * V
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] DIRECT
+*> \verbatim
+*>          DIRECT is CHARACTER*1
+*>          Specifies the order in which the elementary reflectors are
+*>          multiplied to form the block reflector:
+*>          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*>          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*> \endverbatim
+*>
+*> \param[in] STOREV
+*> \verbatim
+*>          STOREV is CHARACTER*1
+*>          Specifies how the vectors which define the elementary
+*>          reflectors are stored (see also Further Details):
+*>          = 'C': columnwise
+*>          = 'R': rowwise
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The order of the block reflector H. N >= 0.
+*> \endverbatim
+*>
+*> \param[in] K
+*> \verbatim
+*>          K is INTEGER
+*>          The order of the triangular factor T (= the number of
+*>          elementary reflectors). K >= 1.
+*> \endverbatim
+*>
+*> \param[in] V
+*> \verbatim
+*>          V is COMPLEX*16 array, dimension
+*>                               (LDV,K) if STOREV = 'C'
+*>                               (LDV,N) if STOREV = 'R'
+*>          The matrix V. See further details.
+*> \endverbatim
+*>
+*> \param[in] LDV
+*> \verbatim
+*>          LDV is INTEGER
+*>          The leading dimension of the array V.
+*>          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*> \endverbatim
+*>
+*> \param[in] TAU
+*> \verbatim
+*>          TAU is COMPLEX*16 array, dimension (K)
+*>          TAU(i) must contain the scalar factor of the elementary
+*>          reflector H(i).
+*> \endverbatim
+*>
+*> \param[out] T
+*> \verbatim
+*>          T is COMPLEX*16 array, dimension (LDT,K)
+*>          The k by k triangular factor T of the block reflector.
+*>          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*>          lower triangular. The rest of the array is not used.
+*> \endverbatim
+*>
+*> \param[in] LDT
+*> \verbatim
+*>          LDT is INTEGER
+*>          The leading dimension of the array T. LDT >= K.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup larft
+*
+*> \par Further Details:
+*  =====================
+*>
+*> \verbatim
+*>
+*>  The shape of the matrix V and the storage of the vectors which define
+*>  the H(i) is best illustrated by the following example with n = 5 and
+*>  k = 3. The elements equal to 1 are not stored.
+*>
+*>  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*>
+*>               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+*>                   ( v1  1    )                     (     1 v2 v2 v2 )
+*>                   ( v1 v2  1 )                     (        1 v3 v3 )
+*>                   ( v1 v2 v3 )
+*>                   ( v1 v2 v3 )
+*>
+*>  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*>
+*>               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+*>                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+*>                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+*>                   (     1 v3 )
+*>                   (        1 )
+*> \endverbatim
+*>
+*  =====================================================================
+      SUBROUTINE ZLARFT_LVL2( DIRECT, STOREV, N, K, V, LDV, TAU,
+     $            T, LDT )
+*
+*  -- LAPACK auxiliary routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, PREVLASTV, LASTV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMV, ZTRMV, ZGEMM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( DIRECT, 'F' ) ) THEN
+         PREVLASTV = N
+         DO I = 1, K
+            PREVLASTV = MAX( PREVLASTV, I )
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = 1, I
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( LSAME( STOREV, 'C' ) ) THEN
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( LASTV, I ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
+*
+                  CALL ZGEMV( 'Conjugate transpose', J-I, I-1,
+     $                        -TAU( I ), V( I+1, 1 ), LDV,
+     $                        V( I+1, I ), 1, ONE, T( 1, I ), 1 )
+               ELSE
+*                 Skip any trailing zeros.
+                  DO LASTV = N, I+1, -1
+                     IF( V( I, LASTV ).NE.ZERO ) EXIT
+                  END DO
+                  DO J = 1, I-1
+                     T( J, I ) = -TAU( I ) * V( J , I )
+                  END DO
+                  J = MIN( LASTV, PREVLASTV )
+*
+*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
+*
+                  CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
+     $                        V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
+     $                        ONE, T( 1, I ), LDT )
+               END IF
+*
+*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+               CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
+     $                     T,
+     $                     LDT, T( 1, I ), 1 )
+               T( I, I ) = TAU( I )
+               IF( I.GT.1 ) THEN
+                  PREVLASTV = MAX( PREVLASTV, LASTV )
+               ELSE
+                  PREVLASTV = LASTV
+               END IF
+             END IF
+         END DO
+      ELSE
+         PREVLASTV = 1
+         DO I = K, 1, -1
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO J = I, K
+                  T( J, I ) = ZERO
+               END DO
+            ELSE
+*
+*              general case
+*
+               IF( I.LT.K ) THEN
+                  IF( LSAME( STOREV, 'C' ) ) THEN
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( LASTV, I ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
+*
+                     CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I,
+     $                           -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
+     $                           1, ONE, T( I+1, I ), 1 )
+                  ELSE
+*                    Skip any leading zeros.
+                     DO LASTV = 1, I-1
+                        IF( V( I, LASTV ).NE.ZERO ) EXIT
+                     END DO
+                     DO J = I+1, K
+                        T( J, I ) = -TAU( I ) * V( J, N-K+I )
+                     END DO
+                     J = MAX( LASTV, PREVLASTV )
+*
+*                    T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
+*
+                     CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J,
+     $                           -TAU( I ),
+     $                           V( I+1, J ), LDV, V( I, J ), LDV,
+     $                           ONE, T( I+1, I ), LDT )
+                  END IF
+*
+*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+                  CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit',
+     $                        K-I,
+     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+                  IF( I.GT.1 ) THEN
+                     PREVLASTV = MIN( PREVLASTV, LASTV )
+                  ELSE
+                     PREVLASTV = LASTV
+                  END IF
+               END IF
+               T( I, I ) = TAU( I )
+            END IF
+         END DO
+      END IF
+      RETURN
+*
+*     End of ZLARFT_LVL2
+*
+      END